cmc-sales/scripts/report_remaining_corruption.sql
2025-12-10 07:27:36 +11:00

192 lines
4.4 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- ============================================================================
-- Report Remaining Corrupted Data After Fix
-- ============================================================================
-- Run this after fix_corrupted_data.sql to find records that still need manual fixes
--
-- To run: docker exec cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/report_remaining_corruption.sql
-- ============================================================================
SET NAMES utf8mb4;
SELECT '============================================================' as '';
SELECT 'CORRUPTED DATA REPORT' as '';
SELECT '============================================================' as '';
-- Summary counts
SELECT '' as '';
SELECT 'SUMMARY OF REMAINING CORRUPTION' as '';
SELECT '-------------------------------' as '';
SELECT
'principles' as table_name,
COUNT(*) as corrupted_records,
'name, address, city' as affected_columns
FROM principles
WHERE name REGEXP '[^ -~]'
OR address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT
'customers',
COUNT(*),
'name'
FROM customers
WHERE name REGEXP '[^ -~]'
UNION ALL
SELECT
'addresses',
COUNT(*),
'address, city'
FROM addresses
WHERE address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT
'contacts',
COUNT(*),
'name'
FROM contacts
WHERE name REGEXP '[^ -~]'
UNION ALL
SELECT
'products',
COUNT(*),
'name, description'
FROM products
WHERE name REGEXP '[^ -~]'
OR description REGEXP '[^ -~]';
-- Detailed listings
SELECT '' as '';
SELECT '============================================================' as '';
SELECT 'DETAILED CORRUPTED RECORDS' as '';
SELECT '============================================================' as '';
-- Principles with corruption
SELECT '' as '';
SELECT 'PRINCIPLES TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name,
LEFT(address, 50) as address_preview,
city
FROM principles
WHERE name REGEXP '[^ -~]'
OR address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
ORDER BY id;
-- Customers with corruption
SELECT '' as '';
SELECT 'CUSTOMERS TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name
FROM customers
WHERE name REGEXP '[^ -~]'
ORDER BY id;
-- Addresses with corruption
SELECT '' as '';
SELECT 'ADDRESSES TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
LEFT(address, 60) as address,
city
FROM addresses
WHERE address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
ORDER BY id
LIMIT 50;
-- Contacts with corruption
SELECT '' as '';
SELECT 'CONTACTS TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name
FROM contacts
WHERE name REGEXP '[^ -~]'
ORDER BY id
LIMIT 50;
-- Products with corruption
SELECT '' as '';
SELECT 'PRODUCTS TABLE - Corrupted Records:' as '';
SELECT '-------------------------------------' as '';
SELECT
id,
name,
LEFT(description, 50) as description_preview
FROM products
WHERE name REGEXP '[^ -~]'
OR description REGEXP '[^ -~]'
ORDER BY id
LIMIT 50;
-- Common corruption patterns found
SELECT '' as '';
SELECT '============================================================' as '';
SELECT 'COMMON CORRUPTION PATTERNS DETECTED' as '';
SELECT '============================================================' as '';
SELECT
'’' as mojibake_pattern,
'Smart apostrophe (')' as should_be,
COUNT(*) as occurrences_in_customers
FROM customers
WHERE name LIKE '%â%'
UNION ALL
SELECT
'✠or â',
'Smart quotes (" ")',
COUNT(*)
FROM customers
WHERE name LIKE '%âœ%' OR name LIKE '%â%'
UNION ALL
SELECT
'é, ü, ó, etc',
'Accented chars (é, ü, ó)',
COUNT(*)
FROM customers
WHERE name REGEXP 'Ã[©¼³±§]'
UNION ALL
SELECT
'â" or â€"',
'Dashes ( )',
COUNT(*)
FROM addresses
WHERE address LIKE '%â"%' OR address LIKE '%â€"%'
UNION ALL
SELECT
'Ä, ž, Å¡, etc',
'Czech/Polish chars',
COUNT(*)
FROM principles
WHERE address REGEXP '[ÄÅ][¾¡]';
SELECT '' as '';
SELECT '============================================================' as '';
SELECT 'END OF REPORT' as '';
SELECT '============================================================' as '';