cmc-sales/scripts/report_remaining_corruption.sql

192 lines
4.4 KiB
MySQL
Raw Normal View History

2025-12-09 12:23:22 -08:00
-- ============================================================================
-- Report Remaining Corrupted Data After Fix
-- ============================================================================
-- Run this after fix_corrupted_data.sql to find records that still need manual fixes
--
-- To run: docker exec cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/report_remaining_corruption.sql
-- ============================================================================
SET NAMES utf8mb4;
SELECT '============================================================' as '';
SELECT 'CORRUPTED DATA REPORT' as '';
SELECT '============================================================' as '';
-- Summary counts
SELECT '' as '';
SELECT 'SUMMARY OF REMAINING CORRUPTION' as '';
SELECT '-------------------------------' as '';
SELECT
'principles' as table_name,
COUNT(*) as corrupted_records,
'name, address, city' as affected_columns
FROM principles
WHERE name REGEXP '[^ -~]'
OR address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT
'customers',
COUNT(*),
'name'
FROM customers
WHERE name REGEXP '[^ -~]'
UNION ALL
SELECT
'addresses',
COUNT(*),
'address, city'
FROM addresses
WHERE address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT
'contacts',
COUNT(*),
'name'
FROM contacts
WHERE name REGEXP '[^ -~]'
UNION ALL
SELECT
'products',
COUNT(*),
'name, description'
FROM products
WHERE name REGEXP '[^ -~]'
OR description REGEXP '[^ -~]';
-- Detailed listings
SELECT '' as '';
SELECT '============================================================' as '';
SELECT 'DETAILED CORRUPTED RECORDS' as '';
SELECT '============================================================' as '';
-- Principles with corruption
SELECT '' as '';
SELECT 'PRINCIPLES TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name,
LEFT(address, 50) as address_preview,
city
FROM principles
WHERE name REGEXP '[^ -~]'
OR address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
ORDER BY id;
-- Customers with corruption
SELECT '' as '';
SELECT 'CUSTOMERS TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name
FROM customers
WHERE name REGEXP '[^ -~]'
ORDER BY id;
-- Addresses with corruption
SELECT '' as '';
SELECT 'ADDRESSES TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
LEFT(address, 60) as address,
city
FROM addresses
WHERE address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
ORDER BY id
LIMIT 50;
-- Contacts with corruption
SELECT '' as '';
SELECT 'CONTACTS TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name
FROM contacts
WHERE name REGEXP '[^ -~]'
ORDER BY id
LIMIT 50;
-- Products with corruption
SELECT '' as '';
SELECT 'PRODUCTS TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name,
LEFT(description, 50) as description_preview
FROM products
WHERE name REGEXP '[^ -~]'
OR description REGEXP '[^ -~]'
ORDER BY id
LIMIT 50;
-- Common corruption patterns found
SELECT '' as '';
SELECT '============================================================' as '';
SELECT 'COMMON CORRUPTION PATTERNS DETECTED' as '';
SELECT '============================================================' as '';
SELECT
'’' as mojibake_pattern,
'Smart apostrophe (')' as should_be,
COUNT(*) as occurrences_in_customers
FROM customers
WHERE name LIKE '%’%'
UNION ALL
SELECT
'“ or â€',
'Smart quotes (" ")',
COUNT(*)
FROM customers
WHERE name LIKE '%“%' OR name LIKE '%â€%'
UNION ALL
SELECT
'é, ü, ó, etc',
'Accented chars (é, ü, ó)',
COUNT(*)
FROM customers
WHERE name REGEXP 'Ã[©¼³±§]'
UNION ALL
SELECT
'â€" or â€"',
'Dashes ( —)',
COUNT(*)
FROM addresses
WHERE address LIKE '%â€"%' OR address LIKE '%â€"%'
UNION ALL
SELECT
'Ä, ž, Å¡, etc',
'Czech/Polish chars',
COUNT(*)
FROM principles
WHERE address REGEXP '[ÄÅ][¾¡›™]';
SELECT '' as '';
SELECT '============================================================' as '';
SELECT 'END OF REPORT' as '';
SELECT '============================================================' as '';