From 7f3cb5cbaa73cbd5097e2b104e777f543399e5c3 Mon Sep 17 00:00:00 2001 From: Karl Cordes Date: Tue, 23 Dec 2025 15:54:26 +1100 Subject: [PATCH] Add scripts to fix emojibake corruption --- scripts/VAULT_PERMISSIONS_FIX.md | 250 +++++++++++++++ scripts/fix_all_corruption.sql | 347 +++++++++++++++++++++ scripts/fix_corrupted_data.py | 222 +++++++++++++ scripts/fix_corrupted_data.sql | 69 +++- scripts/fix_corrupted_data_hex.sql | 221 +++++++++++++ scripts/fix_corrupted_data_safe.sh | 97 ++++++ scripts/fix_products_corruption.sql | 291 +++++++++++++++++ scripts/run_comprehensive_fix.sh | 115 +++++++ scripts/run_corruption_fix.sh | 26 +- scripts/scan_all_tables_for_corruption.sql | 126 ++++++++ 10 files changed, 1758 insertions(+), 6 deletions(-) create mode 100644 scripts/VAULT_PERMISSIONS_FIX.md create mode 100644 scripts/fix_all_corruption.sql create mode 100755 scripts/fix_corrupted_data.py create mode 100644 scripts/fix_corrupted_data_hex.sql create mode 100755 scripts/fix_corrupted_data_safe.sh create mode 100644 scripts/fix_products_corruption.sql create mode 100755 scripts/run_comprehensive_fix.sh create mode 100644 scripts/scan_all_tables_for_corruption.sql diff --git a/scripts/VAULT_PERMISSIONS_FIX.md b/scripts/VAULT_PERMISSIONS_FIX.md new file mode 100644 index 00000000..0930427e --- /dev/null +++ b/scripts/VAULT_PERMISSIONS_FIX.md @@ -0,0 +1,250 @@ +# Vault Permissions Fix - Attachment Directory Issue + +## Problem + +The PHP container (`cmc-prod-php`) could not read attachments written by the Go vault because they were being written to **different directories**. + +### Root Cause Analysis + +**Go Vault Container (`cmc-prod-go`):** +- Was writing attachments to: `/var/www/emails/` (default `--emaildir` flag) +- Host directory: `/home/cmc/files/emails` +- Running as: `root` user (UID 0) + +**PHP Application Container (`cmc-prod-php`):** +- Reads attachments from: `/var/www/cmc-sales/app/webroot/attachments_files/` +- Host directory: `/home/cmc/files/attachments_files` +- Running as: `www-data` user (UID 33 or 82) + +### Two Issues Identified + +1. **Different Directory Paths** (Primary Issue) ❌ + - Go wrote to `/var/www/emails` → Host: `/home/cmc/files/emails` + - PHP read from `/var/www/cmc-sales/app/webroot/attachments_files` → Host: `/home/cmc/files/attachments_files` + - These are **completely separate directories on the host**! + +2. **File Permissions** (Secondary) ⚠️ + - Files: `0644` (`-rw-r--r--`) - World readable ✅ + - Directories: `0755` (`drwxr-xr-x`) - World readable/executable ✅ + - Owner: `root:root` (from Go container) + - Reader: `www-data` (from PHP container) + - **Verdict**: Permissions were actually OK (world-readable), but wrong directory! + +## Solution Implemented + +### Changes Made + +**1. Updated Docker Compose ([docker-compose.prod.yml](../docker-compose.prod.yml))** + +Added the `attachments_files` mount to the Go container: + +```yaml +cmc-prod-go: + volumes: + - /home/cmc/files/attachments_files:/var/www/attachments_files # ← ADDED + - /home/cmc/files/emails:/var/www/emails + - /home/cmc/files/vault:/var/www/vault + - /home/cmc/files/vaultmsgs:/var/www/vaultmsgs +``` + +**2. Updated Vault Cron Script ([scripts/vault-cron-prod.sh](vault-cron-prod.sh))** + +Changed the `--emaildir` flag to point to the correct directory: + +```bash +docker exec -t "$CONTAINER_NAME" ./vault --mode=local \ + --vaultdir=/var/www/vaultmsgs/new \ + --processeddir=/var/www/vaultmsgs/cur \ + --emaildir=/var/www/attachments_files \ # ← CHANGED from /var/www/emails + --dbhost=cmc-prod-db \ + --dbuser=cmc \ + --dbpass="xVRQI&cA?7AU=hqJ!%au" \ + --dbname=cmc +``` + +**3. Updated Documentation** + +- [VAULT_QUICKSTART.md](VAULT_QUICKSTART.md) - Updated all examples +- [VAULT_DEPLOYMENT.md](VAULT_DEPLOYMENT.md) - Updated deployment instructions + +## Verification Steps + +After deploying the fix, verify it works: + +### Step 1: Rebuild and Restart Containers + +```bash +cd ~/src/cmc-sales +docker compose -f docker-compose.prod.yml up -d cmc-prod-go +``` + +### Step 2: Verify Mount Points + +```bash +# Check Go container can see the directory +docker exec -it cmc-prod-go ls -la /var/www/attachments_files + +# Check PHP container can see the directory +docker exec -it cmc-prod-php ls -la /var/www/cmc-sales/app/webroot/attachments_files +``` + +Both should show the same files (same inode on host). + +### Step 3: Test Vault Processing + +```bash +# Run vault manually +~/scripts/vault-cron-prod.sh + +# Check if files were written +ls -la /home/cmc/files/attachments_files/$(date +%m-%Y)/ + +# Check file permissions +ls -l /home/cmc/files/attachments_files/$(date +%m-%Y)/ | head -5 +``` + +Expected output: +``` +-rw-r--r-- 1 root root 1234 Nov 23 10:00 abc123-texthtml +-rw-r--r-- 1 root root 5678 Nov 23 10:00 abc123-document.pdf +``` + +### Step 4: Verify PHP Can Read Files + +```bash +# Enter PHP container as www-data user +docker exec -it -u www-data cmc-prod-php sh + +# Try to read an attachment file +cd /var/www/cmc-sales/app/webroot/attachments_files/$(date +%m-%Y) +ls -la +cat abc123-texthtml # Should display content +exit +``` + +### Step 5: Check Database + +```bash +# Verify attachments are in database with correct paths +docker exec -it cmc-prod-db mariadb -u cmc -p cmc -e " +SELECT id, email_id, filename, name, size +FROM email_attachments +ORDER BY created DESC +LIMIT 5; +" +``` + +The `name` column should contain paths like: `11-2025/abc123-filename.pdf` + +## File Permissions Breakdown + +The current permissions (`0644` for files, `0755` for dirs) are correct: + +### File Permissions: `0644` + +``` +-rw-r--r-- +│││││││││ +│││└─┴─┴─── Other (everyone): read (r--) +││└─┬─┴───── Group: read (r--) +│└─┬┴─────── Owner: read + write (rw-) +└─────────── Regular file (-) +``` + +**Result**: ✅ All users (including www-data) can **read** the files + +### Directory Permissions: `0755` + +``` +drwxr-xr-x +││││││││││ +│││└─┴─┴─── Other: read + execute (r-x) - can list & access +││└─┬─┴───── Group: read + execute (r-x) +│└─┬┴─────── Owner: read + write + execute (rwx) +└─────────── Directory (d) +``` + +**Result**: ✅ All users can **navigate into** and **list** the directories + +### Why This Works + +1. **Files are world-readable** (`r--` in other bits) +2. **Directories are world-executable** (`x` in other bits) +3. Even though owner is `root:root`, the "other" permissions allow `www-data` to read + +### When You'd Need to Change Permissions + +You would **only** need different permissions if: + +| Scenario | Change To | Reason | +|----------|-----------|--------| +| Only owner should read | `0600` / `0700` | Security/privacy | +| Group needs write access | `0664` / `0775` | Collaborative editing | +| Everyone needs write | `0666` / `0777` | ⚠️ **NOT recommended** - security risk | + +For your use case: **Keep `0644` / `0755`** ✅ + +## Database Path Storage + +The vault stores relative paths in the database: + +```sql +-- email_attachments table +name: "11-2025/abc123-filename.pdf" +``` + +The PHP application prepends the base path: + +```php +$fullPath = "/var/www/cmc-sales/app/webroot/attachments_files/" . $attachment['name']; +// Results in: /var/www/cmc-sales/app/webroot/attachments_files/11-2025/abc123-filename.pdf +``` + +This is why the directory mount must be correct! + +## Alternative Solutions (Not Implemented) + +### Alternative 1: Symlink (Not Needed) + +Could create a symlink from `/var/www/emails` → `/var/www/attachments_files`, but this adds complexity. + +### Alternative 2: Change User ID (Overkill) + +Could run Go container as `www-data` user, but: +- Requires rebuilding the Dockerfile +- Adds unnecessary complexity +- Current permissions already work + +### Alternative 3: Set Group Permissions (Unnecessary) + +Could use `0664` / `0775` and shared group, but: +- World-readable permissions already work +- No need for write access +- Simpler is better + +## Summary + +**What was fixed:** +- ✅ Go vault now writes to `/var/www/attachments_files` (same as PHP reads from) +- ✅ Added volume mount for `attachments_files` directory +- ✅ Updated all scripts and documentation + +**What didn't need fixing:** +- ✅ File permissions (`0644`) are already world-readable +- ✅ Directory permissions (`0755`) are already world-accessible +- ✅ No user/group changes needed + +**Root cause:** +- Wrong directory path, NOT permissions issue + +## Deployment Checklist + +- [ ] Pull latest code with fixes +- [ ] Restart Go container (`docker compose up -d cmc-prod-go`) +- [ ] Verify mount points in both containers +- [ ] Run vault manually to test +- [ ] Check files appear in `/home/cmc/files/attachments_files/` +- [ ] Verify PHP can read the files +- [ ] Check database paths are correct +- [ ] Enable cron job +- [ ] Monitor logs for 24 hours diff --git a/scripts/fix_all_corruption.sql b/scripts/fix_all_corruption.sql new file mode 100644 index 00000000..7000c2dd --- /dev/null +++ b/scripts/fix_all_corruption.sql @@ -0,0 +1,347 @@ +-- ============================================================================ +-- Fix Character Corruption in ALL Tables +-- ============================================================================ +-- This script fixes mojibake across the entire database +-- Run this AFTER running scan_all_tables_for_corruption.sql to see what's affected +-- +-- IMPORTANT: Create a backup first! +-- docker exec cmc-db mariadb-dump -u root -psecureRootPassword --default-character-set=utf8mb4 cmc | gzip > backup_before_complete_fix_$(date +%Y%m%d).sql.gz +-- +-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_all_corruption.sql +-- ============================================================================ + +SET NAMES utf8mb4; + +SELECT '============================================================' as ''; +SELECT 'COMPREHENSIVE CORRUPTION FIX - ALL TABLES' as ''; +SELECT '============================================================' as ''; + +-- ============================================================================ +-- Define reusable corruption patterns +-- These are applied to every text/varchar column +-- ============================================================================ + +-- Smart quotes and punctuation +-- ’ → ' (smart apostrophe) +-- “ → " (left double quote) +-- †→ " (right double quote) +-- â€" → – (en dash) +-- â€" → — (em dash) +-- ​ → (zero-width space, remove) +--   → (space) +-- … → … + +-- Accented characters +-- é → é +-- É → É +-- ó → ó +-- à → í +-- ç → ç +-- ü → ü +-- á → á +-- ñ → ñ +-- ö → ö +-- ô → ô +-- ß → ß +-- ä → ä + +-- Symbols +-- ° → ° +-- ® → ® +-- â„¢ → ™ + +-- ============================================================================ +-- ADDRESSES Table +-- ============================================================================ +SELECT 'Fixing addresses table...' as ''; + +UPDATE addresses SET name = REPLACE(name, '’', ''') WHERE name LIKE '%’%'; +UPDATE addresses SET name = REPLACE(name, '“', '"') WHERE name LIKE '%“%'; +UPDATE addresses SET name = REPLACE(name, 'â€', '"') WHERE name LIKE '%â€%'; +UPDATE addresses SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; +UPDATE addresses SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; + +UPDATE addresses SET address = REPLACE(address, '’', ''') WHERE address LIKE '%’%'; +UPDATE addresses SET address = REPLACE(address, '“', '"') WHERE address LIKE '%“%'; +UPDATE addresses SET address = REPLACE(address, 'â€', '"') WHERE address LIKE '%â€%'; +UPDATE addresses SET address = REPLACE(address, 'â€"', '–') WHERE address LIKE '%â€"%'; +UPDATE addresses SET address = REPLACE(address, 'â€"', '—') WHERE address LIKE '%â€"%'; +UPDATE addresses SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%'; +UPDATE addresses SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%'; +UPDATE addresses SET address = REPLACE(address, 'ó', 'ó') WHERE address LIKE '%ó%'; +UPDATE addresses SET address = REPLACE(address, 'ñ', 'ñ') WHERE address LIKE '%ñ%'; + +UPDATE addresses SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%'; +UPDATE addresses SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%'; +UPDATE addresses SET city = REPLACE(city, 'ó', 'ó') WHERE city LIKE '%ó%'; + +-- ============================================================================ +-- ATTACHMENTS Table +-- ============================================================================ +SELECT 'Fixing attachments table...' as ''; + +UPDATE attachments SET name = REPLACE(name, '’', ''') WHERE name LIKE '%’%'; +UPDATE attachments SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; +UPDATE attachments SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; + +UPDATE attachments SET description = REPLACE(description, '’', ''') WHERE description LIKE '%’%'; +UPDATE attachments SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; +UPDATE attachments SET description = REPLACE(description, 'â€', '"') WHERE description LIKE '%â€%'; +UPDATE attachments SET description = REPLACE(description, 'é', 'é') WHERE description LIKE '%é%'; +UPDATE attachments SET description = REPLACE(description, 'ü', 'ü') WHERE description LIKE '%ü%'; + +-- ============================================================================ +-- CONTACTS Table +-- ============================================================================ +SELECT 'Fixing contacts table...' as ''; + +UPDATE contacts SET name = REPLACE(name, '’', ''') WHERE name LIKE '%’%'; +UPDATE contacts SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; +UPDATE contacts SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; +UPDATE contacts SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%'; + +UPDATE contacts SET first_name = REPLACE(first_name, '’', ''') WHERE first_name LIKE '%’%'; +UPDATE contacts SET first_name = REPLACE(first_name, 'é', 'é') WHERE first_name LIKE '%é%'; +UPDATE contacts SET first_name = REPLACE(first_name, 'ü', 'ü') WHERE first_name LIKE '%ü%'; + +UPDATE contacts SET last_name = REPLACE(last_name, '’', ''') WHERE last_name LIKE '%’%'; +UPDATE contacts SET last_name = REPLACE(last_name, 'é', 'é') WHERE last_name LIKE '%é%'; +UPDATE contacts SET last_name = REPLACE(last_name, 'ü', 'ü') WHERE last_name LIKE '%ü%'; + +UPDATE contacts SET notes = REPLACE(notes, '’', ''') WHERE notes LIKE '%’%'; +UPDATE contacts SET notes = REPLACE(notes, '“', '"') WHERE notes LIKE '%“%'; +UPDATE contacts SET notes = REPLACE(notes, 'â€', '"') WHERE notes LIKE '%â€%'; +UPDATE contacts SET notes = REPLACE(notes, 'é', 'é') WHERE notes LIKE '%é%'; +UPDATE contacts SET notes = REPLACE(notes, 'ü', 'ü') WHERE notes LIKE '%ü%'; + +UPDATE contacts SET job_title = REPLACE(job_title, '’', ''') WHERE job_title LIKE '%’%'; +UPDATE contacts SET job_title = REPLACE(job_title, 'é', 'é') WHERE job_title LIKE '%é%'; + +-- ============================================================================ +-- CUSTOMERS Table (re-apply in case some were missed) +-- ============================================================================ +SELECT 'Fixing customers table...' as ''; + +UPDATE customers SET name = REPLACE(name, '’', ''') WHERE name LIKE '%’%'; +UPDATE customers SET name = REPLACE(name, '“', '"') WHERE name LIKE '%“%'; +UPDATE customers SET name = REPLACE(name, 'â€', '"') WHERE name LIKE '%â€%'; +UPDATE customers SET name = REPLACE(name, 'â€"', '–') WHERE name LIKE '%â€"%'; +UPDATE customers SET name = REPLACE(name, 'â€"', '—') WHERE name LIKE '%â€"%'; +UPDATE customers SET name = REPLACE(name, '​', '') WHERE name LIKE '%​%'; +UPDATE customers SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; +UPDATE customers SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; +UPDATE customers SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%'; +UPDATE customers SET name = REPLACE(name, 'ç', 'ç') WHERE name LIKE '%ç%'; + +UPDATE customers SET trading_name = REPLACE(trading_name, '’', ''') WHERE trading_name LIKE '%’%'; +UPDATE customers SET trading_name = REPLACE(trading_name, 'é', 'é') WHERE trading_name LIKE '%é%'; + +UPDATE customers SET notes = REPLACE(notes, '’', ''') WHERE notes LIKE '%’%'; +UPDATE customers SET notes = REPLACE(notes, '“', '"') WHERE notes LIKE '%“%'; +UPDATE customers SET notes = REPLACE(notes, 'â€', '"') WHERE notes LIKE '%â€%'; +UPDATE customers SET notes = REPLACE(notes, 'é', 'é') WHERE notes LIKE '%é%'; + +-- ============================================================================ +-- DOCUMENTS Table +-- ============================================================================ +SELECT 'Fixing documents table...' as ''; + +UPDATE documents SET shipping_details = REPLACE(shipping_details, '’', ''') WHERE shipping_details LIKE '%’%'; +UPDATE documents SET shipping_details = REPLACE(shipping_details, 'é', 'é') WHERE shipping_details LIKE '%é%'; + +UPDATE documents SET bill_to = REPLACE(bill_to, '’', ''') WHERE bill_to LIKE '%’%'; +UPDATE documents SET bill_to = REPLACE(bill_to, 'é', 'é') WHERE bill_to LIKE '%é%'; + +UPDATE documents SET ship_to = REPLACE(ship_to, '’', ''') WHERE ship_to LIKE '%’%'; +UPDATE documents SET ship_to = REPLACE(ship_to, 'é', 'é') WHERE ship_to LIKE '%é%'; + +UPDATE documents SET subject = REPLACE(subject, '’', ''') WHERE subject LIKE '%’%'; +UPDATE documents SET subject = REPLACE(subject, '“', '"') WHERE subject LIKE '%“%'; +UPDATE documents SET subject = REPLACE(subject, 'â€', '"') WHERE subject LIKE '%â€%'; +UPDATE documents SET subject = REPLACE(subject, 'é', 'é') WHERE subject LIKE '%é%'; + +-- ============================================================================ +-- EMAILS Table +-- ============================================================================ +SELECT 'Fixing emails table...' as ''; + +UPDATE emails SET `from` = REPLACE(`from`, '’', ''') WHERE `from` LIKE '%’%'; +UPDATE emails SET `from` = REPLACE(`from`, 'é', 'é') WHERE `from` LIKE '%é%'; + +UPDATE emails SET `to` = REPLACE(`to`, '’', ''') WHERE `to` LIKE '%’%'; +UPDATE emails SET `to` = REPLACE(`to`, 'é', 'é') WHERE `to` LIKE '%é%'; + +UPDATE emails SET subject = REPLACE(subject, '’', ''') WHERE subject LIKE '%’%'; +UPDATE emails SET subject = REPLACE(subject, '“', '"') WHERE subject LIKE '%“%'; +UPDATE emails SET subject = REPLACE(subject, 'â€', '"') WHERE subject LIKE '%â€%'; +UPDATE emails SET subject = REPLACE(subject, 'é', 'é') WHERE subject LIKE '%é%'; +UPDATE emails SET subject = REPLACE(subject, 'ü', 'ü') WHERE subject LIKE '%ü%'; + +UPDATE emails SET body = REPLACE(body, '’', ''') WHERE body LIKE '%’%'; +UPDATE emails SET body = REPLACE(body, '“', '"') WHERE body LIKE '%“%'; +UPDATE emails SET body = REPLACE(body, 'â€', '"') WHERE body LIKE '%â€%'; +UPDATE emails SET body = REPLACE(body, 'â€"', '–') WHERE body LIKE '%â€"%'; +UPDATE emails SET body = REPLACE(body, 'â€"', '—') WHERE body LIKE '%â€"%'; +UPDATE emails SET body = REPLACE(body, 'é', 'é') WHERE body LIKE '%é%'; +UPDATE emails SET body = REPLACE(body, 'ü', 'ü') WHERE body LIKE '%ü%'; +UPDATE emails SET body = REPLACE(body, '°', '°') WHERE body LIKE '%°%'; +UPDATE emails SET body = REPLACE(body, '®', '®') WHERE body LIKE '%®%'; + +-- ============================================================================ +-- ENQUIRIES Table +-- ============================================================================ +SELECT 'Fixing enquiries table...' as ''; + +UPDATE enquiries SET comments = REPLACE(comments, '’', ''') WHERE comments LIKE '%’%'; +UPDATE enquiries SET comments = REPLACE(comments, '“', '"') WHERE comments LIKE '%“%'; +UPDATE enquiries SET comments = REPLACE(comments, 'â€', '"') WHERE comments LIKE '%â€%'; +UPDATE enquiries SET comments = REPLACE(comments, 'é', 'é') WHERE comments LIKE '%é%'; +UPDATE enquiries SET comments = REPLACE(comments, 'ü', 'ü') WHERE comments LIKE '%ü%'; + +-- ============================================================================ +-- INVOICES Table +-- ============================================================================ +SELECT 'Fixing invoices table...' as ''; + +UPDATE invoices SET ship_via = REPLACE(ship_via, '’', ''') WHERE ship_via LIKE '%’%'; +UPDATE invoices SET ship_via = REPLACE(ship_via, 'é', 'é') WHERE ship_via LIKE '%é%'; + +UPDATE invoices SET comments = REPLACE(comments, '’', ''') WHERE comments LIKE '%’%'; +UPDATE invoices SET comments = REPLACE(comments, '“', '"') WHERE comments LIKE '%“%'; +UPDATE invoices SET comments = REPLACE(comments, 'â€', '"') WHERE comments LIKE '%â€%'; +UPDATE invoices SET comments = REPLACE(comments, 'é', 'é') WHERE comments LIKE '%é%'; + +-- ============================================================================ +-- JOBS Table +-- ============================================================================ +SELECT 'Fixing jobs table...' as ''; + +UPDATE jobs SET comments = REPLACE(comments, '’', ''') WHERE comments LIKE '%’%'; +UPDATE jobs SET comments = REPLACE(comments, '“', '"') WHERE comments LIKE '%“%'; +UPDATE jobs SET comments = REPLACE(comments, 'â€', '"') WHERE comments LIKE '%â€%'; +UPDATE jobs SET comments = REPLACE(comments, 'é', 'é') WHERE comments LIKE '%é%'; + +-- ============================================================================ +-- LINE_ITEMS Table +-- ============================================================================ +SELECT 'Fixing line_items table...' as ''; + +UPDATE line_items SET name = REPLACE(name, '’', ''') WHERE name LIKE '%’%'; +UPDATE line_items SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; +UPDATE line_items SET name = REPLACE(name, '°', '°') WHERE name LIKE '%°%'; +UPDATE line_items SET name = REPLACE(name, '®', '®') WHERE name LIKE '%®%'; + +UPDATE line_items SET description = REPLACE(description, '’', ''') WHERE description LIKE '%’%'; +UPDATE line_items SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; +UPDATE line_items SET description = REPLACE(description, 'â€', '"') WHERE description LIKE '%â€%'; +UPDATE line_items SET description = REPLACE(description, 'é', 'é') WHERE description LIKE '%é%'; +UPDATE line_items SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE line_items SET description = REPLACE(description, '®', '®') WHERE description LIKE '%®%'; + +-- ============================================================================ +-- ORDER_ACKNOWLEDGEMENTS Table +-- ============================================================================ +SELECT 'Fixing order_acknowledgements table...' as ''; + +UPDATE order_acknowledgements SET comments = REPLACE(comments, '’', ''') WHERE comments LIKE '%’%'; +UPDATE order_acknowledgements SET comments = REPLACE(comments, '“', '"') WHERE comments LIKE '%“%'; +UPDATE order_acknowledgements SET comments = REPLACE(comments, 'â€', '"') WHERE comments LIKE '%â€%'; +UPDATE order_acknowledgements SET comments = REPLACE(comments, 'é', 'é') WHERE comments LIKE '%é%'; + +-- ============================================================================ +-- PRINCIPLES Table (re-apply) +-- ============================================================================ +SELECT 'Fixing principles table...' as ''; + +UPDATE principles SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; +UPDATE principles SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; +UPDATE principles SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%'; + +UPDATE principles SET address = REPLACE(address, 'ß', 'ß') WHERE address LIKE '%ß%'; +UPDATE principles SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%'; +UPDATE principles SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%'; +UPDATE principles SET address = REPLACE(address, 'ñ', 'ñ') WHERE address LIKE '%ñ%'; + +UPDATE principles SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%'; +UPDATE principles SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%'; + +-- ============================================================================ +-- PRODUCTS Table (re-apply with all patterns) +-- ============================================================================ +SELECT 'Fixing products table...' as ''; + +-- Mojibake +UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; +UPDATE products SET title = REPLACE(title, '®', '®') WHERE title LIKE '%®%'; +UPDATE products SET title = REPLACE(title, 'â„¢', '™') WHERE title LIKE '%â„¢%'; +UPDATE products SET title = REPLACE(title, '’', ''') WHERE title LIKE '%’%'; +UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%'; +UPDATE products SET title = REPLACE(title, 'â€', '"') WHERE title LIKE '%â€%'; + +UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE products SET description = REPLACE(description, '®', '®') WHERE description LIKE '%®%'; +UPDATE products SET description = REPLACE(description, 'â„¢', '™') WHERE description LIKE '%â„¢%'; +UPDATE products SET description = REPLACE(description, '’', ''') WHERE description LIKE '%’%'; + +UPDATE products SET item_description = REPLACE(item_description, '°', '°') WHERE item_description LIKE '%°%'; +UPDATE products SET item_description = REPLACE(item_description, '®', '®') WHERE item_description LIKE '%®%'; + +-- HTML entities +UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; +UPDATE products SET title = REPLACE(title, ' ', ' ') WHERE title LIKE '% %'; +UPDATE products SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%'; + +UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE products SET description = REPLACE(description, ' ', ' ') WHERE description LIKE '% %'; +UPDATE products SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%'; + +-- ============================================================================ +-- PURCHASE_ORDERS Table +-- ============================================================================ +SELECT 'Fixing purchase_orders table...' as ''; + +UPDATE purchase_orders SET ship_via = REPLACE(ship_via, '’', ''') WHERE ship_via LIKE '%’%'; +UPDATE purchase_orders SET ship_via = REPLACE(ship_via, 'é', 'é') WHERE ship_via LIKE '%é%'; + +UPDATE purchase_orders SET comments = REPLACE(comments, '’', ''') WHERE comments LIKE '%’%'; +UPDATE purchase_orders SET comments = REPLACE(comments, '“', '"') WHERE comments LIKE '%“%'; +UPDATE purchase_orders SET comments = REPLACE(comments, 'â€', '"') WHERE comments LIKE '%â€%'; +UPDATE purchase_orders SET comments = REPLACE(comments, 'é', 'é') WHERE comments LIKE '%é%'; + +-- ============================================================================ +-- QUOTES Table +-- ============================================================================ +SELECT 'Fixing quotes table...' as ''; + +UPDATE quotes SET comments = REPLACE(comments, '’', ''') WHERE comments LIKE '%’%'; +UPDATE quotes SET comments = REPLACE(comments, '“', '"') WHERE comments LIKE '%“%'; +UPDATE quotes SET comments = REPLACE(comments, 'â€', '"') WHERE comments LIKE '%â€%'; +UPDATE quotes SET comments = REPLACE(comments, 'é', 'é') WHERE comments LIKE '%é%'; + +UPDATE quotes SET notes = REPLACE(notes, '’', ''') WHERE notes LIKE '%’%'; +UPDATE quotes SET notes = REPLACE(notes, '“', '"') WHERE notes LIKE '%“%'; +UPDATE quotes SET notes = REPLACE(notes, 'â€', '"') WHERE notes LIKE '%â€%'; +UPDATE quotes SET notes = REPLACE(notes, 'é', 'é') WHERE notes LIKE '%é%'; + +-- ============================================================================ +-- SHIPMENTS Table +-- ============================================================================ +SELECT 'Fixing shipments table...' as ''; + +UPDATE shipments SET ship_via = REPLACE(ship_via, '’', ''') WHERE ship_via LIKE '%’%'; +UPDATE shipments SET ship_via = REPLACE(ship_via, 'é', 'é') WHERE ship_via LIKE '%é%'; + +UPDATE shipments SET comments = REPLACE(comments, '’', ''') WHERE comments LIKE '%’%'; +UPDATE shipments SET comments = REPLACE(comments, '“', '"') WHERE comments LIKE '%“%'; +UPDATE shipments SET comments = REPLACE(comments, 'â€', '"') WHERE comments LIKE '%â€%'; +UPDATE shipments SET comments = REPLACE(comments, 'é', 'é') WHERE comments LIKE '%é%'; + +-- ============================================================================ +-- Verification +-- ============================================================================ +SELECT '' as ''; +SELECT '============================================================' as ''; +SELECT 'FIX COMPLETE - Verification' as ''; +SELECT '============================================================' as ''; + +SELECT 'Run scripts/scan_all_tables_for_corruption.sql to see remaining corruption' as ''; diff --git a/scripts/fix_corrupted_data.py b/scripts/fix_corrupted_data.py new file mode 100755 index 00000000..211a964f --- /dev/null +++ b/scripts/fix_corrupted_data.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +""" +Fix Corrupted Character Data in CMC Database +Uses Python to properly handle UTF-8 encoding +""" + +import subprocess +import sys + +DB_USER = "root" +DB_PASS = "secureRootPassword" +DB_NAME = "cmc" +CONTAINER = "cmc-db" + +# Mapping of corrupted patterns to correct values +CORRUPTION_PATTERNS = { + # Smart quotes and punctuation + '\u00e2\u0080\u0099': '\u2019', # ’ → ' + '\u00e2\u0080\u009c': '\u201c', # “ → " + '\u00e2\u0080\u009d': '\u201d', # †→ " + '\u00e2\u0080\u0093': '\u2013', # â€" → – + '\u00e2\u0080\u0094': '\u2014', # â€" → — + '\u00e2\u0080\u008b': '', # ​ → (zero-width space, remove) + '\u00e2\u0080\u0083': ' ', #   → (em space, replace with regular space) + + # Accented characters + '\u00c3\u00a9': '\u00e9', # é → é + '\u00c3\u0089': '\u00c9', # É → É + '\u00c3\u00b3': '\u00f3', # ó → ó + '\u00c3\u00ad': '\u00ed', # à → í + '\u00c3\u00a7': '\u00e7', # ç → ç + '\u00c3\u00bc': '\u00fc', # ü → ü + '\u00c3\u00a1': '\u00e1', # á → á + '\u00c3\u00b1': '\u00f1', # ñ → ñ + '\u00c3\u00b6': '\u00f6', # ö → ö + '\u00c3\u00b4': '\u00f4', # ô → ô + '\u00c3\u009f': '\u00df', # ß → ß (German sharp s) + + # Turkish characters + '\u00c4\u00b0': '\u0130', # İ → İ + '\u00c5\u009e': '\u015e', # Å → Ş + '\u00c4\u009e': '\u011e', # Ä → Ğ + + # Czech characters + '\u00c4\u008c': '\u010c', # Ä → Č + '\u00c4\u009b': '\u011b', # Ä› → ě + '\u00c4\u008d': '\u010d', # Ä → č + '\u00c5\u00be': '\u017e', # ž → ž + '\u00c5\u00a1': '\u0161', # Å¡ → š + '\u00c5\u0099': '\u0159', # Å™ → ř + '\u00c5\u0088': '\u0148', # Å → ň + + # Symbols + '\u00c2\u00b0': '\u00b0', # ° → ° (degree) + '\u00c2\u00ae': '\u00ae', # ® → ® (registered) + '\u00e2\u0084\u00a2': '\u2122', # â„¢ → ™ (trademark) +} + +def run_sql(sql): + """Execute SQL command via docker""" + cmd = [ + 'docker', 'exec', CONTAINER, + 'mariadb', + '-u', DB_USER, + f'-p{DB_PASS}', + '--default-character-set=utf8mb4', + DB_NAME, + '-e', sql + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') + if result.returncode != 0: + print(f"Error executing SQL: {result.stderr}", file=sys.stderr) + return False + return True + except Exception as e: + print(f"Exception: {e}", file=sys.stderr) + return False + +def fix_table_column(table, column, patterns): + """Fix corruption patterns in a specific table column""" + print(f"Fixing {table}.{column}...") + + for corrupted, correct in patterns.items(): + # Escape single quotes for SQL + corrupted_esc = corrupted.replace("'", "''") + correct_esc = correct.replace("'", "''") + + sql = f""" + UPDATE {table} + SET {column} = REPLACE({column}, '{corrupted_esc}', '{correct_esc}') + WHERE {column} LIKE CONCAT('%', '{corrupted_esc}', '%'); + """ + + if not run_sql(sql): + print(f" Warning: Failed to fix pattern in {table}.{column}") + + print(f" ✓ {table}.{column} patterns fixed") + +def fix_html_entities(): + """Fix HTML entities that shouldn't be in the database""" + print("\nFixing HTML entities in products table...") + + entities = { + '&': '&', + ' ': ' ', + '–': '–', + '—': '—', + '”': '"', + '“': '"', + '’': ''', + '‘': ''', + '°': '°', + '½': '½', + '¼': '¼', + '¾': '¾', + '×': '×', + } + + for table_col in [('products', 'title'), ('products', 'description'), ('products', 'item_description')]: + table, column = table_col + for entity, char in entities.items(): + sql = f""" + UPDATE {table} + SET {column} = REPLACE({column}, '{entity}', '{char}') + WHERE {column} LIKE '%{entity}%'; + """ + run_sql(sql) + + print(" ✓ HTML entities fixed") + +def fix_specific_records(): + """Fix specific known corrupted records""" + print("\nFixing specific known records...") + + fixes = [ + ("customers", 253, "name", "SOLUZÉ CIVIL ENGINEERS Trading Under SOLUZE PTY LTD"), + ("customers", 1006, "name", "Dr. Prem's Molecules Private Limited (DPMolecules)"), + ("customers", 1387, "name", 'DEE ENTERPRISES (QLD) PTY LTD trading as "MEKLEK"'), + ("customers", 1608, "name", "Guidera O'Connor Pty Ltd"), + ("customers", 2174, "name", "Ingredion ANZ Pty Ltd"), + ("customers", 2215, "name", "Vale Nouvelle-Calédonie S.A.S"), + ("customers", 2375, "name", "Evaluación y Control Ambiental S.A.S."), + ("customers", 3143, "name", "Zontahevy Comércio e Serviços Offshore Ltda"), + ("customers", 3529, "name", "Société des Mines de Syama"), + ("customers", 4325, "name", "Rambøll Danmark"), + ("customers", 4350, "name", "SONNIVA ENERGY MAKİNA İNŞAAT İTH. İHR. LTD. ŞTİ."), + ("customers", 4669, "name", "F.C.C. Fluides Conseils Calédonie SARL"), + ("customers", 4743, "name", "DGPack Prostějov"), + ("customers", 4893, "name", "Oxiquímica, S.A.P.I. de C.V."), + + ("principles", 2, "address", "Heinz-Fangman-Straße 18"), + ("principles", 9, "address", "Bezručova 2901\\n756 61 Rožnov pod Radhoštěm"), + ("principles", 9, "city", "Rožnov pod Radhoštěm"), + ("principles", 13, "name", "IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH"), + ("principles", 14, "address", "Liebigstraße 2"), + ("principles", 58, "name", "Nosia S.r.l.- Señalización Industrial"), + ("principles", 65, "address", "Alte Emser Straße 32"), + + ("addresses", 19, "address", "Lvl 3, Building B, 7–11 Talavera Road"), + + ("products", 30, "title", "C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable"), + ("products", 38, "title", "Mid-West Instrument Model 240 – SC – 02 – O(TT)"), + ("products", 67, "title", "Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp"), + ("products", 76, "title", "Newson Gale Earth-Rite® RTR Tester - ER2/CRT"), + ("products", 85, "title", "Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable"), + ] + + for table, record_id, column, value in fixes: + value_esc = value.replace("'", "''") + sql = f"UPDATE {table} SET {column} = '{value_esc}' WHERE id = {record_id};" + run_sql(sql) + + print(f" ✓ {len(fixes)} specific records fixed") + +def verify(): + """Verify the fixes""" + print("\n" + "="*60) + print("Verification Results") + print("="*60) + + run_sql("SELECT 'Remaining corrupted customers:', COUNT(*) FROM customers WHERE name REGEXP '[^ -~]';") + run_sql("SELECT 'Remaining corrupted principles:', COUNT(*) FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]';") + run_sql("SELECT 'Remaining corrupted addresses:', COUNT(*) FROM addresses WHERE address REGEXP '[^ -~]';") + run_sql("SELECT 'Remaining corrupted products:', COUNT(*) FROM products WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]';") + +def main(): + print("="*60) + print("CMC Database Character Corruption Fix") + print("="*60) + print("") + + # Set charset + run_sql("SET NAMES utf8mb4;") + + # Fix pattern-based corruption + fix_table_column("customers", "name", CORRUPTION_PATTERNS) + fix_table_column("principles", "name", CORRUPTION_PATTERNS) + fix_table_column("principles", "address", CORRUPTION_PATTERNS) + fix_table_column("principles", "city", CORRUPTION_PATTERNS) + fix_table_column("addresses", "address", CORRUPTION_PATTERNS) + fix_table_column("addresses", "city", CORRUPTION_PATTERNS) + fix_table_column("products", "title", CORRUPTION_PATTERNS) + fix_table_column("products", "description", CORRUPTION_PATTERNS) + fix_table_column("products", "item_description", CORRUPTION_PATTERNS) + + # Fix HTML entities + fix_html_entities() + + # Fix specific records + fix_specific_records() + + # Verify + verify() + + print("\n" + "="*60) + print("Fix complete!") + print("="*60) + +if __name__ == "__main__": + main() diff --git a/scripts/fix_corrupted_data.sql b/scripts/fix_corrupted_data.sql index 2a9894ae..43c9fa83 100644 --- a/scripts/fix_corrupted_data.sql +++ b/scripts/fix_corrupted_data.sql @@ -218,7 +218,67 @@ SET address = 'Lvl 3, Building B, 7–11 Talavera Road' WHERE id = 19; -- ============================================================================ --- PART 3: Verification Queries +-- PART 3: Fix PRODUCTS Table (12,536+ corrupted records) +-- ============================================================================ +-- Products table has extensive corruption from mojibake and HTML entities + +-- Degree symbol: ° → ° +UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; +UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE products SET item_description = REPLACE(item_description, '°', '°') WHERE item_description LIKE '%°%'; + +-- Registered trademark: ® → ® +UPDATE products SET title = REPLACE(title, '®', '®') WHERE title LIKE '%®%'; +UPDATE products SET description = REPLACE(description, '®', '®') WHERE description LIKE '%®%'; +UPDATE products SET item_description = REPLACE(item_description, '®', '®') WHERE item_description LIKE '%®%'; + +-- Trademark: â„¢ → ™ +UPDATE products SET title = REPLACE(title, 'â„¢', '™') WHERE title LIKE '%â„¢%'; +UPDATE products SET description = REPLACE(description, 'â„¢', '™') WHERE description LIKE '%â„¢%'; +UPDATE products SET item_description = REPLACE(item_description, 'â„¢', '™') WHERE item_description LIKE '%â„¢%'; + +-- Smart quotes and dashes in products +UPDATE products SET title = REPLACE(title, '’', ''') WHERE title LIKE '%’%'; +UPDATE products SET description = REPLACE(description, '’', ''') WHERE description LIKE '%’%'; +UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%'; +UPDATE products SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; +UPDATE products SET title = REPLACE(title, 'â€', '"') WHERE title LIKE '%â€%'; +UPDATE products SET description = REPLACE(description, 'â€', '"') WHERE description LIKE '%â€%'; +UPDATE products SET title = REPLACE(title, 'â€"', '–') WHERE title LIKE '%â€"%'; +UPDATE products SET description = REPLACE(description, 'â€"', '–') WHERE description LIKE '%â€"%'; +UPDATE products SET title = REPLACE(title, 'â€"', '—') WHERE title LIKE '%â€"%'; +UPDATE products SET description = REPLACE(description, 'â€"', '—') WHERE description LIKE '%â€"%'; + +-- Accented characters in products +UPDATE products SET title = REPLACE(title, 'é', 'é') WHERE title LIKE '%é%'; +UPDATE products SET description = REPLACE(description, 'é', 'é') WHERE description LIKE '%é%'; +UPDATE products SET title = REPLACE(title, 'ü', 'ü') WHERE title LIKE '%ü%'; +UPDATE products SET description = REPLACE(description, 'ü', 'ü') WHERE description LIKE '%ü%'; + +-- HTML entities (shouldn't be in database) +UPDATE products SET title = REPLACE(title, '–', '–') WHERE title LIKE '%–%'; +UPDATE products SET description = REPLACE(description, '–', '–') WHERE description LIKE '%–%'; +UPDATE products SET title = REPLACE(title, '”', '"') WHERE title LIKE '%”%'; +UPDATE products SET description = REPLACE(description, '”', '"') WHERE description LIKE '%”%'; +UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%'; +UPDATE products SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; +UPDATE products SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%'; +UPDATE products SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%'; +UPDATE products SET title = REPLACE(title, '½', '½') WHERE title LIKE '%½%'; +UPDATE products SET description = REPLACE(description, '½', '½') WHERE description LIKE '%½%'; +UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; +UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE products SET title = REPLACE(title, ' ', ' ') WHERE title LIKE '% %'; +UPDATE products SET description = REPLACE(description, ' ', ' ') WHERE description LIKE '% %'; + +-- Specific product fixes +UPDATE products SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' WHERE id = 30; +UPDATE products SET title = 'Mid-West Instrument Model 240 – SC – 02 – O(TT)' WHERE id = 38; +UPDATE products SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' WHERE id = 67; +UPDATE products SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' WHERE id = 85; + +-- ============================================================================ +-- PART 4: Verification Queries -- ============================================================================ -- Run these after the fixes to verify they worked @@ -244,6 +304,13 @@ SELECT COUNT(*) as remaining_corrupted_addresses FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]'; +SELECT COUNT(*) as remaining_corrupted_products +FROM products +WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]' OR item_description REGEXP '[^ -~]'; + +-- Check sample fixed products +SELECT id, title FROM products WHERE id IN (30, 38, 67, 85); + -- ============================================================================ -- COMPLETION -- ============================================================================ diff --git a/scripts/fix_corrupted_data_hex.sql b/scripts/fix_corrupted_data_hex.sql new file mode 100644 index 00000000..c3e9a773 --- /dev/null +++ b/scripts/fix_corrupted_data_hex.sql @@ -0,0 +1,221 @@ +-- ============================================================================ +-- Fix Corrupted Character Data in CMC Database (Using HEX encoding) +-- ============================================================================ +-- This script fixes mojibake using HEX encoding to avoid UTF-8 interpretation issues +-- Run after converting tables to utf8mb4 +-- +-- IMPORTANT: Review this script and test on a backup first! +-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data_hex.sql +-- ============================================================================ + +SET NAMES utf8mb4; + +-- ============================================================================ +-- PART 1: Fix CUSTOMERS table using HEX encoding +-- ============================================================================ + +-- Smart apostrophe: ’ → ' +UPDATE customers SET name = REPLACE(name, UNHEX('E28099'), UNHEX('E28099')) WHERE name LIKE CONCAT('%', UNHEX('E28099'), '%'); +-- Actually, let's use the simpler binary approach +UPDATE customers SET name = REPLACE(name, _binary 0xE28099, '''') WHERE name LIKE _binary CONCAT('%', 0xE28099, '%'); + +-- Left double quote: “ → " +UPDATE customers SET name = REPLACE(name, _binary 0xE2809C, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809C, '%'); + +-- Right double quote: †→ " +UPDATE customers SET name = REPLACE(name, _binary 0xE2809D, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809D, '%'); + +-- En dash: â€" → – +UPDATE customers SET name = REPLACE(name, _binary 0xE28093, '–') WHERE name LIKE _binary CONCAT('%', 0xE28093, '%'); + +-- Em dash: â€" → — +UPDATE customers SET name = REPLACE(name, _binary 0xE28094, '—') WHERE name LIKE _binary CONCAT('%', 0xE28094, '%'); + +-- Zero-width space: ​ → (remove) +UPDATE customers SET name = REPLACE(name, _binary 0xE2808B, '') WHERE name LIKE _binary CONCAT('%', 0xE2808B, '%'); + +-- Em space:   → (regular space) +UPDATE customers SET name = REPLACE(name, _binary 0xE28083, ' ') WHERE name LIKE _binary CONCAT('%', 0xE28083, '%'); + +-- Accented é: é → é +UPDATE customers SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%'); + +-- Accented É: É → É +UPDATE customers SET name = REPLACE(name, _binary 0xC389, 'É') WHERE name LIKE _binary CONCAT('%', 0xC389, '%'); + +-- Accented ó: ó → ó +UPDATE customers SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%'); + +-- Accented í: à → í +UPDATE customers SET name = REPLACE(name, _binary 0xC3AD, 'í') WHERE name LIKE _binary CONCAT('%', 0xC3AD, '%'); + +-- Accented ç: ç → ç +UPDATE customers SET name = REPLACE(name, _binary 0xC3A7, 'ç') WHERE name LIKE _binary CONCAT('%', 0xC3A7, '%'); + +-- Accented ü: ü → ü +UPDATE customers SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%'); + +-- Accented á: á → á +UPDATE customers SET name = REPLACE(name, _binary 0xC3A1, 'á') WHERE name LIKE _binary CONCAT('%', 0xC3A1, '%'); + +-- Accented ñ: ñ → ñ +UPDATE customers SET name = REPLACE(name, _binary 0xC3B1, 'ñ') WHERE name LIKE _binary CONCAT('%', 0xC3B1, '%'); + +-- Accented ö: ö → ö +UPDATE customers SET name = REPLACE(name, _binary 0xC3B6, 'ö') WHERE name LIKE _binary CONCAT('%', 0xC3B6, '%'); + +-- Accented ô: ô → ô +UPDATE customers SET name = REPLACE(name, _binary 0xC3B4, 'ô') WHERE name LIKE _binary CONCAT('%', 0xC3B4, '%'); + +-- Turkish İ: İ → İ +UPDATE customers SET name = REPLACE(name, _binary 0xC4B0, 'İ') WHERE name LIKE _binary CONCAT('%', 0xC4B0, '%'); + +-- Turkish Ş: Å → Ş +UPDATE customers SET name = REPLACE(name, _binary 0xC59E, 'Ş') WHERE name LIKE _binary CONCAT('%', 0xC59E, '%'); + +-- Turkish Ğ: Ä → Ğ +UPDATE customers SET name = REPLACE(name, _binary 0xC49E, 'Ğ') WHERE name LIKE _binary CONCAT('%', 0xC49E, '%'); + +-- Czech Č: Ä → Č +UPDATE customers SET name = REPLACE(name, _binary 0xC48C, 'Č') WHERE name LIKE _binary CONCAT('%', 0xC48C, '%'); + +-- Czech ě: Ä› → ě +UPDATE customers SET name = REPLACE(name, _binary 0xC49B, 'ě') WHERE name LIKE _binary CONCAT('%', 0xC49B, '%'); + +-- Remove trailing tabs +UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t'; +UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t'; + +-- ============================================================================ +-- PART 2: Fix PRINCIPLES table +-- ============================================================================ + +-- Accented ü: ü → ü +UPDATE principles SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%'); +UPDATE principles SET address = REPLACE(address, _binary 0xC3BC, 'ü') WHERE address LIKE _binary CONCAT('%', 0xC3BC, '%'); +UPDATE principles SET city = REPLACE(city, _binary 0xC3BC, 'ü') WHERE city LIKE _binary CONCAT('%', 0xC3BC, '%'); + +-- Accented é: é → é +UPDATE principles SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%'); +UPDATE principles SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%'); +UPDATE principles SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%'); + +-- Accented ó: ó → ó +UPDATE principles SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%'); +UPDATE principles SET city = REPLACE(city, _binary 0xC3B3, 'ó') WHERE city LIKE _binary CONCAT('%', 0xC3B3, '%'); + +-- German ß: ß → ß +UPDATE principles SET address = REPLACE(address, _binary 0xC39F, 'ß') WHERE address LIKE _binary CONCAT('%', 0xC39F, '%'); + +-- Accented ñ: ñ → ñ +UPDATE principles SET address = REPLACE(address, _binary 0xC3B1, 'ñ') WHERE address LIKE _binary CONCAT('%', 0xC3B1, '%'); + +-- Czech č: Ä → č +UPDATE principles SET address = REPLACE(address, _binary 0xC48D, 'č') WHERE address LIKE _binary CONCAT('%', 0xC48D, '%'); + +-- Czech ž: ž → ž +UPDATE principles SET address = REPLACE(address, _binary 0xC5BE, 'ž') WHERE address LIKE _binary CONCAT('%', 0xC5BE, '%'); + +-- Czech š: Å¡ → š +UPDATE principles SET address = REPLACE(address, _binary 0xC5A1, 'š') WHERE address LIKE _binary CONCAT('%', 0xC5A1, '%'); + +-- Czech ě: Ä› → ě +UPDATE principles SET address = REPLACE(address, _binary 0xC49B, 'ě') WHERE address LIKE _binary CONCAT('%', 0xC49B, '%'); + +-- Czech ň: Å → ň +UPDATE principles SET address = REPLACE(address, _binary 0xC588, 'ň') WHERE address LIKE _binary CONCAT('%', 0xC588, '%'); + +-- Czech ř: Å™ → ř +UPDATE principles SET address = REPLACE(address, _binary 0xC599, 'ř') WHERE address LIKE _binary CONCAT('%', 0xC599, '%'); + +-- Specific principle fixes +UPDATE principles SET address = 'Heinz-Fangman-Straße 18' WHERE id = 2; +UPDATE principles SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm', city = 'Rožnov pod Radhoštěm' WHERE id = 9; +UPDATE principles SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH' WHERE id = 13; +UPDATE principles SET address = 'Liebigstraße 2' WHERE id = 14; +UPDATE principles SET name = 'Nosia S.r.l.- Señalización Industrial' WHERE id = 58; +UPDATE principles SET address = 'Alte Emser Straße 32' WHERE id = 65; + +-- ============================================================================ +-- PART 3: Fix ADDRESSES table +-- ============================================================================ + +-- En dash: â€" → – +UPDATE addresses SET address = REPLACE(address, _binary 0xE28093, '–') WHERE address LIKE _binary CONCAT('%', 0xE28093, '%'); + +-- Accented é: é → é +UPDATE addresses SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%'); +UPDATE addresses SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%'); + +-- Specific address fix +UPDATE addresses SET address = 'Lvl 3, Building B, 7–11 Talavera Road' WHERE id = 19; + +-- ============================================================================ +-- PART 4: Fix PRODUCTS table (using simple text for known values) +-- ============================================================================ + +-- Degree symbol: ° → ° +UPDATE products SET title = REPLACE(title, CHAR(194,176), CHAR(176)) WHERE title LIKE CONCAT('%', CHAR(194,176), '%'); +UPDATE products SET description = REPLACE(description, CHAR(194,176), CHAR(176)) WHERE description LIKE CONCAT('%', CHAR(194,176), '%'); +UPDATE products SET item_description = REPLACE(item_description, CHAR(194,176), CHAR(176)) WHERE item_description LIKE CONCAT('%', CHAR(194,176), '%'); + +-- Registered trademark: ® → ® +UPDATE products SET title = REPLACE(title, CHAR(194,174), CHAR(174)) WHERE title LIKE CONCAT('%', CHAR(194,174), '%'); +UPDATE products SET description = REPLACE(description, CHAR(194,174), CHAR(174)) WHERE description LIKE CONCAT('%', CHAR(194,174), '%'); +UPDATE products SET item_description = REPLACE(item_description, CHAR(194,174), CHAR(174)) WHERE item_description LIKE CONCAT('%', CHAR(194,174), '%'); + +-- Trademark: â„¢ → ™ (0xE284A2 → 0x2122) +UPDATE products SET title = REPLACE(title, CHAR(226,132,162), CHAR(226,132,162)) WHERE title LIKE CONCAT('%', CHAR(226,132,162), '%'); +UPDATE products SET description = REPLACE(description, CHAR(226,132,162), CHAR(226,132,162)) WHERE description LIKE CONCAT('%', CHAR(226,132,162), '%'); + +-- Smart apostrophe ’ → ' +UPDATE products SET title = REPLACE(title, CHAR(226,128,153), '''') WHERE title LIKE CONCAT('%', CHAR(226,128,153), '%'); +UPDATE products SET description = REPLACE(description, CHAR(226,128,153), '''') WHERE description LIKE CONCAT('%', CHAR(226,128,153), '%'); + +-- En dash â€" → – +UPDATE products SET title = REPLACE(title, CHAR(226,128,147), '–') WHERE title LIKE CONCAT('%', CHAR(226,128,147), '%'); +UPDATE products SET description = REPLACE(description, CHAR(226,128,147), '–') WHERE description LIKE CONCAT('%', CHAR(226,128,147), '%'); + +-- HTML entities (safe to use text) +UPDATE products SET title = REPLACE(title, '–', '–') WHERE title LIKE '%–%'; +UPDATE products SET description = REPLACE(description, '–', '–') WHERE description LIKE '%–%'; +UPDATE products SET title = REPLACE(title, '”', '"') WHERE title LIKE '%”%'; +UPDATE products SET description = REPLACE(description, '”', '"') WHERE description LIKE '%”%'; +UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%'; +UPDATE products SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; +UPDATE products SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%'; +UPDATE products SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%'; +UPDATE products SET title = REPLACE(title, '½', '½') WHERE title LIKE '%½%'; +UPDATE products SET description = REPLACE(description, '½', '½') WHERE description LIKE '%½%'; +UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; +UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE products SET title = REPLACE(title, ' ', ' ') WHERE title LIKE '% %'; +UPDATE products SET description = REPLACE(description, ' ', ' ') WHERE description LIKE '% %'; + +-- Specific product fixes +UPDATE products SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' WHERE id = 30; +UPDATE products SET title = 'Mid-West Instrument Model 240 – SC – 02 – O(TT)' WHERE id = 38; +UPDATE products SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' WHERE id = 67; +UPDATE products SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' WHERE id = 85; + +-- ============================================================================ +-- Verification +-- ============================================================================ + +SELECT '===========================================================' as ''; +SELECT 'FIX COMPLETE - Verification Results' as ''; +SELECT '===========================================================' as ''; + +SELECT 'Remaining corrupted principles:' as '', COUNT(*) as count +FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]'; + +SELECT 'Remaining corrupted customers:' as '', COUNT(*) as count +FROM customers WHERE name REGEXP '[^ -~]'; + +SELECT 'Remaining corrupted addresses:' as '', COUNT(*) as count +FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]'; + +SELECT 'Remaining corrupted products:' as '', COUNT(*) as count +FROM products WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]' OR item_description REGEXP '[^ -~]'; + +SELECT 'Fix script completed!' as status; diff --git a/scripts/fix_corrupted_data_safe.sh b/scripts/fix_corrupted_data_safe.sh new file mode 100755 index 00000000..42387b5f --- /dev/null +++ b/scripts/fix_corrupted_data_safe.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# ============================================================================ +# Fix Corrupted Character Data - Safe Version +# ============================================================================ +# This script fixes mojibake by generating SQL dynamically to avoid encoding issues +# ============================================================================ + +DB_USER="root" +DB_PASS="secureRootPassword" +DB_NAME="cmc" +CONTAINER="cmc-db" + +# Function to run SQL +run_sql() { + docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "$1" +} + +echo "============================================================" +echo "Fixing Character Corruption - Safe Version" +echo "============================================================" +echo "" + +# Set charset +run_sql "SET NAMES utf8mb4;" + +echo "Fixing CUSTOMERS table..." +# Fix customers - specific known records +run_sql "UPDATE customers SET name = 'SOLUZÉ CIVIL ENGINEERS Trading Under SOLUZE PTY LTD' WHERE id = 253;" +run_sql "UPDATE customers SET name = 'Dr. Prem''s Molecules Private Limited (DPMolecules)' WHERE id = 1006;" +run_sql "UPDATE customers SET name = 'DEE ENTERPRISES (QLD) PTY LTD trading as \"MEKLEK\"' WHERE id = 1387;" +run_sql "UPDATE customers SET name = 'Guidera O''Connor Pty Ltd' WHERE id = 1608;" +run_sql "UPDATE customers SET name = 'Ingredion ANZ Pty Ltd' WHERE id = 2174;" +run_sql "UPDATE customers SET name = 'Vale Nouvelle-Calédonie S.A.S' WHERE id = 2215;" +run_sql "UPDATE customers SET name = 'Evaluación y Control Ambiental S.A.S.' WHERE id = 2375;" +run_sql "UPDATE customers SET name = 'Zontahevy Comércio e Serviços Offshore Ltda' WHERE id = 3143;" +run_sql "UPDATE customers SET name = 'Société des Mines de Syama' WHERE id = 3529;" +run_sql "UPDATE customers SET name = 'P.J. Berriman & Co PTY LTD' WHERE id = 3633;" +run_sql "UPDATE customers SET name = 'Rambøll Danmark' WHERE id = 4325;" +run_sql "UPDATE customers SET name = 'SONNIVA ENERGY MAKİNA İNŞAAT İTH. İHR. LTD. ŞTİ.' WHERE id = 4350;" +run_sql "UPDATE customers SET name = 'F.C.C. Fluides Conseils Calédonie SARL' WHERE id = 4669;" +run_sql "UPDATE customers SET name = 'DGPack Prostějov' WHERE id = 4743;" +run_sql "UPDATE customers SET name = 'Mccready Welding Pty Ltd Trading under the entity Mccready''s Welding Services' WHERE id = 4764;" +run_sql "UPDATE customers SET name = 'Oxiquímica, S.A.P.I. de C.V.' WHERE id = 4893;" + +echo "Fixing PRINCIPLES table..." +run_sql "UPDATE principles SET address = 'Heinz-Fangman-Straße 18' WHERE id = 2;" +run_sql "UPDATE principles SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm', city = 'Rožnov pod Radhoštěm' WHERE id = 9;" +run_sql "UPDATE principles SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH' WHERE id = 13;" +run_sql "UPDATE principles SET address = 'Liebigstraße 2' WHERE id = 14;" +run_sql "UPDATE principles SET name = 'Nosia S.r.l.- Señalización Industrial' WHERE id = 58;" +run_sql "UPDATE principles SET address = 'Alte Emser Straße 32' WHERE id = 65;" + +echo "Fixing ADDRESSES table..." +run_sql "UPDATE addresses SET address = 'Lvl 3, Building B, 7–11 Talavera Road' WHERE id = 19;" + +echo "Fixing PRODUCTS table (specific records)..." +run_sql "UPDATE products SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' WHERE id = 30;" +run_sql "UPDATE products SET title = 'Mid-West Instrument Model 240 – SC – 02 – O(TT)' WHERE id = 38;" +run_sql "UPDATE products SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' WHERE id = 67;" +run_sql "UPDATE products SET title = 'Newson Gale Earth-Rite® RTR Tester - ER2/CRT' WHERE id = 76;" +run_sql "UPDATE products SET title = 'Newson Gale Earth-Rite® Installers Kit A for IIB areas only (Metal Enclosures Only)' WHERE id = 77;" +run_sql "UPDATE products SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' WHERE id = 85;" +run_sql "UPDATE products SET title = 'Newson Gale Earth-Rite® RTR System Spares - X90-IP Heavy Duty 2 Core Clamp with 10m 2 Core Spiral Cable and Male Quick Connect' WHERE id = 86;" +run_sql "UPDATE products SET title = 'Newson Gale Bond-Rite® EZ, VESX90-IP Heavy Duty Clamp & 3m 2 Core Spiral Cable' WHERE id = 149;" + +echo "" +echo "Fixing HTML entities in products..." +# These are safe ASCII characters, so no encoding issues +run_sql "UPDATE products SET title = REPLACE(title, '–', '–') WHERE title LIKE '%–%';" +run_sql "UPDATE products SET description = REPLACE(description, '–', '–') WHERE description LIKE '%–%';" +run_sql "UPDATE products SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%';" +run_sql "UPDATE products SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%';" +run_sql "UPDATE products SET title = REPLACE(title, ' ', ' ') WHERE title LIKE '% %';" +run_sql "UPDATE products SET description = REPLACE(description, ' ', ' ') WHERE description LIKE '% %';" +run_sql "UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%';" +run_sql "UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%';" +run_sql "UPDATE products SET title = REPLACE(title, '½', '½') WHERE title LIKE '%½%';" +run_sql "UPDATE products SET description = REPLACE(description, '½', '½') WHERE description LIKE '%½%';" +run_sql "UPDATE products SET title = REPLACE(title, '”', '\"') WHERE title LIKE '%”%';" +run_sql "UPDATE products SET description = REPLACE(description, '”', '\"') WHERE description LIKE '%”%';" +run_sql "UPDATE products SET title = REPLACE(title, '“', '\"') WHERE title LIKE '%“%';" +run_sql "UPDATE products SET description = REPLACE(description, '“', '\"') WHERE description LIKE '%“%';" + +echo "" +echo "============================================================" +echo "Fix complete! Verification:" +echo "============================================================" + +run_sql "SELECT 'Remaining corrupted customers:' as status, COUNT(*) as count FROM customers WHERE name REGEXP '[^ -~]';" +run_sql "SELECT 'Remaining corrupted principles:' as status, COUNT(*) as count FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]';" +run_sql "SELECT 'Remaining corrupted products:' as status, COUNT(*) as count FROM products WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]';" + +echo "" +echo "Done! Check specific records:" +echo "SELECT id, name FROM customers WHERE id IN (253, 1006, 1387);" +echo "SELECT id, name, city FROM principles WHERE id IN (2, 9, 13);" +echo "SELECT id, title FROM products WHERE id IN (30, 38, 67, 85);" diff --git a/scripts/fix_products_corruption.sql b/scripts/fix_products_corruption.sql new file mode 100644 index 00000000..90ea8049 --- /dev/null +++ b/scripts/fix_products_corruption.sql @@ -0,0 +1,291 @@ +-- ============================================================================ +-- Fix Corrupted Character Data in PRODUCTS Table +-- ============================================================================ +-- This script fixes mojibake in the products table (12,536+ affected records) +-- Also fixes HTML entities that shouldn't be in the raw data +-- +-- IMPORTANT: Review this script and test on a backup first! +-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_products_corruption.sql +-- ============================================================================ + +SET NAMES utf8mb4; + +SELECT '============================================================' as ''; +SELECT 'PRODUCTS TABLE - CHARACTER CORRUPTION FIX' as ''; +SELECT '============================================================' as ''; + +-- Count before fix +SELECT 'BEFORE FIX - Corrupted records count:' as ''; +SELECT COUNT(*) as corrupted_products +FROM products +WHERE title REGEXP '[^ -~]' + OR description REGEXP '[^ -~]' + OR item_description REGEXP '[^ -~]' + OR notes REGEXP '[^ -~]'; + +-- ============================================================================ +-- PART 1: Fix Mojibake Patterns (UTF-8 corruption) +-- ============================================================================ + +-- Degree symbol: ° → ° +UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; +UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE products SET item_description = REPLACE(item_description, '°', '°') WHERE item_description LIKE '%°%'; +UPDATE products SET notes = REPLACE(notes, '°', '°') WHERE notes LIKE '%°%'; + +-- Registered trademark: ® → ® +UPDATE products SET title = REPLACE(title, '®', '®') WHERE title LIKE '%®%'; +UPDATE products SET description = REPLACE(description, '®', '®') WHERE description LIKE '%®%'; +UPDATE products SET item_description = REPLACE(item_description, '®', '®') WHERE item_description LIKE '%®%'; +UPDATE products SET notes = REPLACE(notes, '®', '®') WHERE notes LIKE '%®%'; + +-- Trademark: â„¢ → ™ +UPDATE products SET title = REPLACE(title, 'â„¢', '™') WHERE title LIKE '%â„¢%'; +UPDATE products SET description = REPLACE(description, 'â„¢', '™') WHERE description LIKE '%â„¢%'; +UPDATE products SET item_description = REPLACE(item_description, 'â„¢', '™') WHERE item_description LIKE '%â„¢%'; +UPDATE products SET notes = REPLACE(notes, 'â„¢', '™') WHERE notes LIKE '%â„¢%'; + +-- Smart apostrophe: ’ → ' +UPDATE products SET title = REPLACE(title, '’', ''') WHERE title LIKE '%’%'; +UPDATE products SET description = REPLACE(description, '’', ''') WHERE description LIKE '%’%'; +UPDATE products SET item_description = REPLACE(item_description, '’', ''') WHERE item_description LIKE '%’%'; +UPDATE products SET notes = REPLACE(notes, '’', ''') WHERE notes LIKE '%’%'; + +-- Smart left double quote: “ → " +UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%'; +UPDATE products SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; +UPDATE products SET item_description = REPLACE(item_description, '“', '"') WHERE item_description LIKE '%“%'; +UPDATE products SET notes = REPLACE(notes, '“', '"') WHERE notes LIKE '%“%'; + +-- Smart right double quote: †→ " +UPDATE products SET title = REPLACE(title, 'â€', '"') WHERE title LIKE '%â€%'; +UPDATE products SET description = REPLACE(description, 'â€', '"') WHERE description LIKE '%â€%'; +UPDATE products SET item_description = REPLACE(item_description, 'â€', '"') WHERE item_description LIKE '%â€%'; +UPDATE products SET notes = REPLACE(notes, 'â€', '"') WHERE notes LIKE '%â€%'; + +-- En dash: â€" → – +UPDATE products SET title = REPLACE(title, 'â€"', '–') WHERE title LIKE '%â€"%'; +UPDATE products SET description = REPLACE(description, 'â€"', '–') WHERE description LIKE '%â€"%'; +UPDATE products SET item_description = REPLACE(item_description, 'â€"', '–') WHERE item_description LIKE '%â€"%'; +UPDATE products SET notes = REPLACE(notes, 'â€"', '–') WHERE notes LIKE '%â€"%'; + +-- Em dash: â€" → — +UPDATE products SET title = REPLACE(title, 'â€"', '—') WHERE title LIKE '%â€"%'; +UPDATE products SET description = REPLACE(description, 'â€"', '—') WHERE description LIKE '%â€"%'; +UPDATE products SET item_description = REPLACE(item_description, 'â€"', '—') WHERE item_description LIKE '%â€"%'; +UPDATE products SET notes = REPLACE(notes, 'â€"', '—') WHERE notes LIKE '%â€"%'; + +-- Ellipsis: … → … +UPDATE products SET title = REPLACE(title, '…', '…') WHERE title LIKE '%…%'; +UPDATE products SET description = REPLACE(description, '…', '…') WHERE description LIKE '%…%'; +UPDATE products SET item_description = REPLACE(item_description, '…', '…') WHERE item_description LIKE '%…%'; +UPDATE products SET notes = REPLACE(notes, '…', '…') WHERE notes LIKE '%…%'; + +-- Accented characters (common in product descriptions) +-- é +UPDATE products SET title = REPLACE(title, 'é', 'é') WHERE title LIKE '%é%'; +UPDATE products SET description = REPLACE(description, 'é', 'é') WHERE description LIKE '%é%'; +UPDATE products SET item_description = REPLACE(item_description, 'é', 'é') WHERE item_description LIKE '%é%'; +UPDATE products SET notes = REPLACE(notes, 'é', 'é') WHERE notes LIKE '%é%'; + +-- ü +UPDATE products SET title = REPLACE(title, 'ü', 'ü') WHERE title LIKE '%ü%'; +UPDATE products SET description = REPLACE(description, 'ü', 'ü') WHERE description LIKE '%ü%'; +UPDATE products SET item_description = REPLACE(item_description, 'ü', 'ü') WHERE item_description LIKE '%ü%'; +UPDATE products SET notes = REPLACE(notes, 'ü', 'ü') WHERE notes LIKE '%ü%'; + +-- ö +UPDATE products SET title = REPLACE(title, 'ö', 'ö') WHERE title LIKE '%ö%'; +UPDATE products SET description = REPLACE(description, 'ö', 'ö') WHERE description LIKE '%ö%'; +UPDATE products SET item_description = REPLACE(item_description, 'ö', 'ö') WHERE item_description LIKE '%ö%'; +UPDATE products SET notes = REPLACE(notes, 'ö', 'ö') WHERE notes LIKE '%ö%'; + +-- ä +UPDATE products SET title = REPLACE(title, 'ä', 'ä') WHERE title LIKE '%ä%'; +UPDATE products SET description = REPLACE(description, 'ä', 'ä') WHERE description LIKE '%ä%'; +UPDATE products SET item_description = REPLACE(item_description, 'ä', 'ä') WHERE item_description LIKE '%ä%'; +UPDATE products SET notes = REPLACE(notes, 'ä', 'ä') WHERE notes LIKE '%ä%'; + +-- ß (German sharp s) +UPDATE products SET title = REPLACE(title, 'ß', 'ß') WHERE title LIKE '%ß%'; +UPDATE products SET description = REPLACE(description, 'ß', 'ß') WHERE description LIKE '%ß%'; +UPDATE products SET item_description = REPLACE(item_description, 'ß', 'ß') WHERE item_description LIKE '%ß%'; +UPDATE products SET notes = REPLACE(notes, 'ß', 'ß') WHERE notes LIKE '%ß%'; + +-- ============================================================================ +-- PART 2: Fix HTML Entities (shouldn't be in raw database data) +-- ============================================================================ +-- Note: These are likely from copy-paste from HTML or rich text editors + +-- – → – +UPDATE products SET title = REPLACE(title, '–', '–') WHERE title LIKE '%–%'; +UPDATE products SET description = REPLACE(description, '–', '–') WHERE description LIKE '%–%'; +UPDATE products SET item_description = REPLACE(item_description, '–', '–') WHERE item_description LIKE '%–%'; +UPDATE products SET notes = REPLACE(notes, '–', '–') WHERE notes LIKE '%–%'; + +-- — → — +UPDATE products SET title = REPLACE(title, '—', '—') WHERE title LIKE '%—%'; +UPDATE products SET description = REPLACE(description, '—', '—') WHERE description LIKE '%—%'; +UPDATE products SET item_description = REPLACE(item_description, '—', '—') WHERE item_description LIKE '%—%'; +UPDATE products SET notes = REPLACE(notes, '—', '—') WHERE notes LIKE '%—%'; + +-- ” → " +UPDATE products SET title = REPLACE(title, '”', '"') WHERE title LIKE '%”%'; +UPDATE products SET description = REPLACE(description, '”', '"') WHERE description LIKE '%”%'; +UPDATE products SET item_description = REPLACE(item_description, '”', '"') WHERE item_description LIKE '%”%'; +UPDATE products SET notes = REPLACE(notes, '”', '"') WHERE notes LIKE '%”%'; + +-- “ → " +UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%'; +UPDATE products SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; +UPDATE products SET item_description = REPLACE(item_description, '“', '"') WHERE item_description LIKE '%“%'; +UPDATE products SET notes = REPLACE(notes, '“', '"') WHERE notes LIKE '%“%'; + +-- ’ → ' +UPDATE products SET title = REPLACE(title, '’', ''') WHERE title LIKE '%’%'; +UPDATE products SET description = REPLACE(description, '’', ''') WHERE description LIKE '%’%'; +UPDATE products SET item_description = REPLACE(item_description, '’', ''') WHERE item_description LIKE '%’%'; +UPDATE products SET notes = REPLACE(notes, '’', ''') WHERE notes LIKE '%’%'; + +-- ‘ → ' +UPDATE products SET title = REPLACE(title, '‘', ''') WHERE title LIKE '%‘%'; +UPDATE products SET description = REPLACE(description, '‘', ''') WHERE description LIKE '%‘%'; +UPDATE products SET item_description = REPLACE(item_description, '‘', ''') WHERE item_description LIKE '%‘%'; +UPDATE products SET notes = REPLACE(notes, '‘', ''') WHERE notes LIKE '%‘%'; + +-- " → " (keep as is, but decode entity) +UPDATE products SET title = REPLACE(title, '"', '"') WHERE title LIKE '%"%'; +UPDATE products SET description = REPLACE(description, '"', '"') WHERE description LIKE '%"%'; +UPDATE products SET item_description = REPLACE(item_description, '"', '"') WHERE item_description LIKE '%"%'; +UPDATE products SET notes = REPLACE(notes, '"', '"') WHERE notes LIKE '%"%'; + +-- & → & (ampersand) +UPDATE products SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%'; +UPDATE products SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%'; +UPDATE products SET item_description = REPLACE(item_description, '&', '&') WHERE item_description LIKE '%&%'; +UPDATE products SET notes = REPLACE(notes, '&', '&') WHERE notes LIKE '%&%'; + +-- Common fractions +-- ½ → ½ +UPDATE products SET title = REPLACE(title, '½', '½') WHERE title LIKE '%½%'; +UPDATE products SET description = REPLACE(description, '½', '½') WHERE description LIKE '%½%'; +UPDATE products SET item_description = REPLACE(item_description, '½', '½') WHERE item_description LIKE '%½%'; +UPDATE products SET notes = REPLACE(notes, '½', '½') WHERE notes LIKE '%½%'; + +-- ¼ → ¼ +UPDATE products SET title = REPLACE(title, '¼', '¼') WHERE title LIKE '%¼%'; +UPDATE products SET description = REPLACE(description, '¼', '¼') WHERE description LIKE '%¼%'; +UPDATE products SET item_description = REPLACE(item_description, '¼', '¼') WHERE item_description LIKE '%¼%'; +UPDATE products SET notes = REPLACE(notes, '¼', '¼') WHERE notes LIKE '%¼%'; + +-- ¾ → ¾ +UPDATE products SET title = REPLACE(title, '¾', '¾') WHERE title LIKE '%¾%'; +UPDATE products SET description = REPLACE(description, '¾', '¾') WHERE description LIKE '%¾%'; +UPDATE products SET item_description = REPLACE(item_description, '¾', '¾') WHERE item_description LIKE '%¾%'; +UPDATE products SET notes = REPLACE(notes, '¾', '¾') WHERE notes LIKE '%¾%'; + +-- ° → ° +UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; +UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; +UPDATE products SET item_description = REPLACE(item_description, '°', '°') WHERE item_description LIKE '%°%'; +UPDATE products SET notes = REPLACE(notes, '°', '°') WHERE notes LIKE '%°%'; + +-- × → × +UPDATE products SET title = REPLACE(title, '×', '×') WHERE title LIKE '%×%'; +UPDATE products SET description = REPLACE(description, '×', '×') WHERE description LIKE '%×%'; +UPDATE products SET item_description = REPLACE(item_description, '×', '×') WHERE item_description LIKE '%×%'; +UPDATE products SET notes = REPLACE(notes, '×', '×') WHERE notes LIKE '%×%'; + +--   → (space) - non-breaking space +UPDATE products SET title = REPLACE(title, ' ', ' ') WHERE title LIKE '% %'; +UPDATE products SET description = REPLACE(description, ' ', ' ') WHERE description LIKE '% %'; +UPDATE products SET item_description = REPLACE(item_description, ' ', ' ') WHERE item_description LIKE '% %'; +UPDATE products SET notes = REPLACE(notes, ' ', ' ') WHERE notes LIKE '% %'; + +-- ============================================================================ +-- PART 3: Specific Known Corrupted Records +-- ============================================================================ + +-- ID 30: Degree symbol in title +UPDATE products +SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' +WHERE id = 30; + +-- ID 38: Em dashes in title +UPDATE products +SET title = 'Mid-West Instrument Model 240 – SC – 02 – O(TT)' +WHERE id = 38; + +-- ID 67: Registered trademark +UPDATE products +SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' +WHERE id = 67; + +-- ID 76: Registered trademark +UPDATE products +SET title = 'Newson Gale Earth-Rite® RTR Tester - ER2/CRT' +WHERE id = 76; + +-- ID 77: Registered trademark +UPDATE products +SET title = 'Newson Gale Earth-Rite® Installers Kit A for IIB areas only (Metal Enclosures Only)' +WHERE id = 77; + +-- ID 85: Registered trademark and trademark +UPDATE products +SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' +WHERE id = 85; + +-- ID 86: Registered trademark +UPDATE products +SET title = 'Newson Gale Earth-Rite® RTR System Spares - X90-IP Heavy Duty 2 Core Clamp with 10m 2 Core Spiral Cable and Male Quick Connect' +WHERE id = 86; + +-- ID 149: Registered trademark +UPDATE products +SET title = 'Newson Gale Bond-Rite® EZ, VESX90-IP Heavy Duty Clamp & 3m 2 Core Spiral Cable' +WHERE id = 149; + +-- ID 151: Registered trademark +UPDATE products +SET title = 'UNSURE Newson Gale Bond-Rite® Clamp with 20ft (6.1m) Cable Reel' +WHERE id = 151; + +-- ID 155: Registered trademark +UPDATE products +SET title = 'Newson Gale Earth-Rite® MGV Tri-Mode Static Grounding System, GRP Enclosure, X90-IP Heavy Duty Clamp with 15m 2 Core Spiral Cable' +WHERE id = 155; + +-- ID 220: Registered trademark +UPDATE products +SET title = 'Newson Gale Earth-Rite® TELLUS II Static Earthing System, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' +WHERE id = 220; + +-- ID 249: Double registered trademarks +UPDATE products +SET title = 'Newson Gale Bond-Rite® Remote GRP Multi-Way Junction Box for 2 to 4 Bond-Rite® Remote EP Units' +WHERE id = 249; + +-- ============================================================================ +-- PART 4: Verification +-- ============================================================================ + +SELECT '' as ''; +SELECT '============================================================' as ''; +SELECT 'AFTER FIX - Corrupted records count:' as ''; +SELECT COUNT(*) as remaining_corrupted_products +FROM products +WHERE title REGEXP '[^ -~]' + OR description REGEXP '[^ -~]' + OR item_description REGEXP '[^ -~]' + OR notes REGEXP '[^ -~]'; + +-- Show sample fixed records +SELECT '' as ''; +SELECT 'Sample fixed records:' as ''; +SELECT id, title FROM products WHERE id IN (30, 38, 67, 76, 85, 149); + +SELECT '' as ''; +SELECT '============================================================' as ''; +SELECT 'PRODUCTS TABLE FIX COMPLETE' as ''; +SELECT '============================================================' as ''; diff --git a/scripts/run_comprehensive_fix.sh b/scripts/run_comprehensive_fix.sh new file mode 100755 index 00000000..2b76c094 --- /dev/null +++ b/scripts/run_comprehensive_fix.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# ============================================================================ +# Comprehensive Corruption Fix - All Tables +# ============================================================================ +# This script: +# 1. Creates a backup +# 2. Scans all tables for corruption +# 3. Prompts for confirmation +# 4. Applies fixes to ALL tables +# 5. Shows before/after comparison +# ============================================================================ + +set -e # Exit on error + +DB_USER="root" +DB_PASS="secureRootPassword" +DB_NAME="cmc" +CONTAINER="cmc-db" +BACKUP_DIR="./backups" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}============================================================${NC}" +echo -e "${BLUE}CMC Database - Comprehensive Corruption Fix${NC}" +echo -e "${BLUE}============================================================${NC}" +echo "" + +# Create backups directory +mkdir -p "$BACKUP_DIR" + +# Step 1: Create backup +echo -e "${YELLOW}Step 1: Creating full database backup...${NC}" +docker exec $CONTAINER mariadb-dump \ + -u $DB_USER \ + -p$DB_PASS \ + --default-character-set=utf8mb4 \ + --single-transaction \ + $DB_NAME | gzip > "$BACKUP_DIR/backup_comprehensive_fix_$TIMESTAMP.sql.gz" + +BACKUP_SIZE=$(du -h "$BACKUP_DIR/backup_comprehensive_fix_$TIMESTAMP.sql.gz" | cut -f1) +echo -e "${GREEN}✓ Backup created: backup_comprehensive_fix_$TIMESTAMP.sql.gz (${BACKUP_SIZE})${NC}" +echo "" + +# Step 2: Scan all tables +echo -e "${YELLOW}Step 2: Scanning ALL tables for corruption...${NC}" +echo "-------------------------------------------" +docker exec $CONTAINER mariadb \ + -u $DB_USER \ + -p$DB_PASS \ + --default-character-set=utf8mb4 \ + $DB_NAME < scripts/scan_all_tables_for_corruption.sql | head -100 +echo "" + +# Step 3: Prompt for confirmation +echo -e "${RED}IMPORTANT:${NC}" +echo "- Backup created at: $BACKUP_DIR/backup_comprehensive_fix_$TIMESTAMP.sql.gz" +echo "- This will modify ALL text columns in ALL tables" +echo "- Fixes mojibake patterns like: ’ → ', é → é, ° → °, etc." +echo "- Tables affected: customers, principles, addresses, products, contacts," +echo " emails, invoices, jobs, quotes, purchase_orders, shipments, etc." +echo "" +read -p "Do you want to proceed with the comprehensive fix? (yes/no): " CONFIRM + +if [ "$CONFIRM" != "yes" ]; then + echo -e "${RED}Fix cancelled by user${NC}" + exit 0 +fi + +# Step 4: Apply comprehensive fixes +echo "" +echo -e "${YELLOW}Step 4: Applying comprehensive corruption fixes...${NC}" +docker exec -i $CONTAINER mariadb \ + -u $DB_USER \ + -p$DB_PASS \ + --default-character-set=utf8mb4 \ + $DB_NAME < scripts/fix_all_corruption.sql + +echo -e "${GREEN}✓ Comprehensive fix executed${NC}" +echo "" + +# Step 5: Re-scan to show results +echo -e "${YELLOW}Step 5: Re-scanning to verify fixes...${NC}" +echo "---------------------------------------" +docker exec $CONTAINER mariadb \ + -u $DB_USER \ + -p$DB_PASS \ + --default-character-set=utf8mb4 \ + $DB_NAME < scripts/scan_all_tables_for_corruption.sql | head -100 +echo "" + +# Final summary +echo -e "${BLUE}============================================================${NC}" +echo -e "${GREEN}COMPREHENSIVE FIX COMPLETED!${NC}" +echo -e "${BLUE}============================================================${NC}" +echo "" +echo "Summary:" +echo "- Backup location: $BACKUP_DIR/backup_comprehensive_fix_$TIMESTAMP.sql.gz" +echo "- Fixed corruption patterns across ALL tables" +echo "- Common patterns: smart quotes, accented characters, symbols, HTML entities" +echo "" +echo "Next steps:" +echo "1. Review the fixed data in the application" +echo "2. Test critical functionality (quotes, invoices, emails, etc.)" +echo "3. Run: bash scripts/report_remaining_corruption.sh if any issues remain" +echo "" +echo "To rollback if needed:" +echo " gunzip < $BACKUP_DIR/backup_comprehensive_fix_$TIMESTAMP.sql.gz | \\" +echo " docker exec -i $CONTAINER mariadb -u $DB_USER -p$DB_PASS $DB_NAME" +echo "" diff --git a/scripts/run_corruption_fix.sh b/scripts/run_corruption_fix.sh index c2611803..1393132c 100755 --- a/scripts/run_corruption_fix.sh +++ b/scripts/run_corruption_fix.sh @@ -66,7 +66,13 @@ UNION ALL SELECT 'addresses', COUNT(*) FROM addresses WHERE address REGEXP '[^ -~]' - OR city REGEXP '[^ -~]'; + OR city REGEXP '[^ -~]' +UNION ALL +SELECT 'products', COUNT(*) +FROM products +WHERE title REGEXP '[^ -~]' + OR description REGEXP '[^ -~]' + OR item_description REGEXP '[^ -~]'; " echo "" @@ -76,14 +82,16 @@ echo "-------------------------------------------" docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e " SELECT 'CUSTOMERS' as table_name, id, name FROM customers WHERE id IN (253, 1006, 1387, 1608) UNION ALL -SELECT 'PRINCIPLES', id, name FROM principles WHERE id IN (2, 9, 13, 14); +SELECT 'PRINCIPLES', id, name FROM principles WHERE id IN (2, 9, 13, 14) +UNION ALL +SELECT 'PRODUCTS', id, title FROM products WHERE id IN (30, 38, 67); " echo "" # Step 4: Prompt for confirmation echo -e "${RED}IMPORTANT:${NC}" echo "- Backup created at: $BACKUP_DIR/backup_before_corruption_fix_$TIMESTAMP.sql.gz" -echo "- This will modify data in tables: principles, customers, addresses" +echo "- This will modify data in tables: principles, customers, addresses, products" echo "- Changes fix mojibake like: ’ → ', é → é, etc." echo "" read -p "Do you want to proceed with the fix? (yes/no): " CONFIRM @@ -113,7 +121,9 @@ echo "Sample fixed data:" docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e " SELECT 'CUSTOMERS' as table_name, id, name FROM customers WHERE id IN (253, 1006, 1387, 1608) UNION ALL -SELECT 'PRINCIPLES', id, name FROM principles WHERE id IN (2, 9, 13, 14); +SELECT 'PRINCIPLES', id, name FROM principles WHERE id IN (2, 9, 13, 14) +UNION ALL +SELECT 'PRODUCTS', id, title FROM products WHERE id IN (30, 38, 67); " echo "" @@ -134,7 +144,13 @@ UNION ALL SELECT 'addresses', COUNT(*) FROM addresses WHERE address REGEXP '[^ -~]' - OR city REGEXP '[^ -~]'; + OR city REGEXP '[^ -~]' +UNION ALL +SELECT 'products', COUNT(*) +FROM products +WHERE title REGEXP '[^ -~]' + OR description REGEXP '[^ -~]' + OR item_description REGEXP '[^ -~]'; " echo "" diff --git a/scripts/scan_all_tables_for_corruption.sql b/scripts/scan_all_tables_for_corruption.sql new file mode 100644 index 00000000..95d9211d --- /dev/null +++ b/scripts/scan_all_tables_for_corruption.sql @@ -0,0 +1,126 @@ +-- ============================================================================ +-- Scan ALL Tables for Character Corruption +-- ============================================================================ +-- This script scans every table in the database for mojibake/corruption +-- Finds all VARCHAR and TEXT columns and checks them for non-ASCII characters +-- +-- To run: docker exec cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/scan_all_tables_for_corruption.sql +-- ============================================================================ + +SET NAMES utf8mb4; + +SELECT '============================================================' as ''; +SELECT 'COMPREHENSIVE CORRUPTION SCAN - ALL TABLES' as ''; +SELECT '============================================================' as ''; +SELECT '' as ''; + +-- Scan all major tables with text content +SELECT 'Table: addresses' as ''; +SELECT COUNT(*) as corrupted_records, 'name' as column_name FROM addresses WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'address' FROM addresses WHERE address REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'city' FROM addresses WHERE city REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: attachments' as ''; +SELECT COUNT(*) as corrupted_records, 'name' as column_name FROM attachments WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'filename' FROM attachments WHERE filename REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'description' FROM attachments WHERE description REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: contacts' as ''; +SELECT COUNT(*) as corrupted_records, 'name' as column_name FROM contacts WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'first_name' FROM contacts WHERE first_name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'last_name' FROM contacts WHERE last_name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'notes' FROM contacts WHERE notes REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'job_title' FROM contacts WHERE job_title REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: customers' as ''; +SELECT COUNT(*) as corrupted_records, 'name' as column_name FROM customers WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'trading_name' FROM customers WHERE trading_name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'notes' FROM customers WHERE notes REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'payment_terms' FROM customers WHERE payment_terms REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: documents' as ''; +SELECT COUNT(*) as corrupted_records, 'cmc_reference' as column_name FROM documents WHERE cmc_reference REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'shipping_details' FROM documents WHERE shipping_details REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'bill_to' FROM documents WHERE bill_to REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'ship_to' FROM documents WHERE ship_to REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'name' FROM documents WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'subject' FROM documents WHERE subject REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: emails' as ''; +SELECT COUNT(*) as corrupted_records, 'from' as column_name FROM emails WHERE `from` REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'to' FROM emails WHERE `to` REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'subject' FROM emails WHERE subject REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'body' FROM emails WHERE body REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'bcc' FROM emails WHERE bcc REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: enquiries' as ''; +SELECT COUNT(*) as corrupted_records, 'title' as column_name FROM enquiries WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'comments' FROM enquiries WHERE comments REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: invoices' as ''; +SELECT COUNT(*) as corrupted_records, 'title' as column_name FROM invoices WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'ship_via' FROM invoices WHERE ship_via REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'fob' FROM invoices WHERE fob REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'comments' FROM invoices WHERE comments REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: jobs' as ''; +SELECT COUNT(*) as corrupted_records, 'title' as column_name FROM jobs WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'comments' FROM jobs WHERE comments REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: line_items' as ''; +SELECT COUNT(*) as corrupted_records, 'name' as column_name FROM line_items WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'description' FROM line_items WHERE description REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: order_acknowledgements' as ''; +SELECT COUNT(*) as corrupted_records, 'title' as column_name FROM order_acknowledgements WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'comments' FROM order_acknowledgements WHERE comments REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: principles' as ''; +SELECT COUNT(*) as corrupted_records, 'name' as column_name FROM principles WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'address' FROM principles WHERE address REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'city' FROM principles WHERE city REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'short_name' FROM principles WHERE short_name REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: products' as ''; +SELECT COUNT(*) as corrupted_records, 'name' as column_name FROM products WHERE name REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'title' FROM products WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'description' FROM products WHERE description REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'item_description' FROM products WHERE item_description REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'notes' FROM products WHERE notes REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: purchase_orders' as ''; +SELECT COUNT(*) as corrupted_records, 'title' as column_name FROM purchase_orders WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'ship_via' FROM purchase_orders WHERE ship_via REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'comments' FROM purchase_orders WHERE comments REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'fob' FROM purchase_orders WHERE fob REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: quotes' as ''; +SELECT COUNT(*) as corrupted_records, 'title' as column_name FROM quotes WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'comments' FROM quotes WHERE comments REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'notes' FROM quotes WHERE notes REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT 'Table: shipments' as ''; +SELECT COUNT(*) as corrupted_records, 'title' as column_name FROM shipments WHERE title REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'ship_via' FROM shipments WHERE ship_via REGEXP '[^ -~]' +UNION ALL SELECT COUNT(*), 'comments' FROM shipments WHERE comments REGEXP '[^ -~]'; + +SELECT '' as ''; +SELECT '============================================================' as ''; +SELECT 'SUMMARY - Tables with corruption (>0 records)' as ''; +SELECT '============================================================' as ''; +-- This gives you a quick overview of which tables need fixing