192 lines
4.4 KiB
SQL
192 lines
4.4 KiB
SQL
-- ============================================================================
|
||
-- Report Remaining Corrupted Data After Fix
|
||
-- ============================================================================
|
||
-- Run this after fix_corrupted_data.sql to find records that still need manual fixes
|
||
--
|
||
-- To run: docker exec cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/report_remaining_corruption.sql
|
||
-- ============================================================================
|
||
|
||
SET NAMES utf8mb4;
|
||
|
||
SELECT '============================================================' as '';
|
||
SELECT 'CORRUPTED DATA REPORT' as '';
|
||
SELECT '============================================================' as '';
|
||
|
||
-- Summary counts
|
||
SELECT '' as '';
|
||
SELECT 'SUMMARY OF REMAINING CORRUPTION' as '';
|
||
SELECT '-------------------------------' as '';
|
||
|
||
SELECT
|
||
'principles' as table_name,
|
||
COUNT(*) as corrupted_records,
|
||
'name, address, city' as affected_columns
|
||
FROM principles
|
||
WHERE name REGEXP '[^ -~]'
|
||
OR address REGEXP '[^ -~]'
|
||
OR city REGEXP '[^ -~]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'customers',
|
||
COUNT(*),
|
||
'name'
|
||
FROM customers
|
||
WHERE name REGEXP '[^ -~]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'addresses',
|
||
COUNT(*),
|
||
'address, city'
|
||
FROM addresses
|
||
WHERE address REGEXP '[^ -~]'
|
||
OR city REGEXP '[^ -~]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'contacts',
|
||
COUNT(*),
|
||
'name'
|
||
FROM contacts
|
||
WHERE name REGEXP '[^ -~]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'products',
|
||
COUNT(*),
|
||
'name, description'
|
||
FROM products
|
||
WHERE name REGEXP '[^ -~]'
|
||
OR description REGEXP '[^ -~]';
|
||
|
||
-- Detailed listings
|
||
SELECT '' as '';
|
||
SELECT '============================================================' as '';
|
||
SELECT 'DETAILED CORRUPTED RECORDS' as '';
|
||
SELECT '============================================================' as '';
|
||
|
||
-- Principles with corruption
|
||
SELECT '' as '';
|
||
SELECT 'PRINCIPLES TABLE - Corrupted Records:' as '';
|
||
SELECT '-------------------------------------' as '';
|
||
SELECT
|
||
id,
|
||
name,
|
||
LEFT(address, 50) as address_preview,
|
||
city
|
||
FROM principles
|
||
WHERE name REGEXP '[^ -~]'
|
||
OR address REGEXP '[^ -~]'
|
||
OR city REGEXP '[^ -~]'
|
||
ORDER BY id;
|
||
|
||
-- Customers with corruption
|
||
SELECT '' as '';
|
||
SELECT 'CUSTOMERS TABLE - Corrupted Records:' as '';
|
||
SELECT '-------------------------------------' as '';
|
||
SELECT
|
||
id,
|
||
name
|
||
FROM customers
|
||
WHERE name REGEXP '[^ -~]'
|
||
ORDER BY id;
|
||
|
||
-- Addresses with corruption
|
||
SELECT '' as '';
|
||
SELECT 'ADDRESSES TABLE - Corrupted Records:' as '';
|
||
SELECT '-------------------------------------' as '';
|
||
SELECT
|
||
id,
|
||
LEFT(address, 60) as address,
|
||
city
|
||
FROM addresses
|
||
WHERE address REGEXP '[^ -~]'
|
||
OR city REGEXP '[^ -~]'
|
||
ORDER BY id
|
||
LIMIT 50;
|
||
|
||
-- Contacts with corruption
|
||
SELECT '' as '';
|
||
SELECT 'CONTACTS TABLE - Corrupted Records:' as '';
|
||
SELECT '-------------------------------------' as '';
|
||
SELECT
|
||
id,
|
||
name
|
||
FROM contacts
|
||
WHERE name REGEXP '[^ -~]'
|
||
ORDER BY id
|
||
LIMIT 50;
|
||
|
||
-- Products with corruption
|
||
SELECT '' as '';
|
||
SELECT 'PRODUCTS TABLE - Corrupted Records:' as '';
|
||
SELECT '-------------------------------------' as '';
|
||
SELECT
|
||
id,
|
||
name,
|
||
LEFT(description, 50) as description_preview
|
||
FROM products
|
||
WHERE name REGEXP '[^ -~]'
|
||
OR description REGEXP '[^ -~]'
|
||
ORDER BY id
|
||
LIMIT 50;
|
||
|
||
-- Common corruption patterns found
|
||
SELECT '' as '';
|
||
SELECT '============================================================' as '';
|
||
SELECT 'COMMON CORRUPTION PATTERNS DETECTED' as '';
|
||
SELECT '============================================================' as '';
|
||
|
||
SELECT
|
||
'’' as mojibake_pattern,
|
||
'Smart apostrophe (')' as should_be,
|
||
COUNT(*) as occurrences_in_customers
|
||
FROM customers
|
||
WHERE name LIKE '%’%'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'“ or â€',
|
||
'Smart quotes (" ")',
|
||
COUNT(*)
|
||
FROM customers
|
||
WHERE name LIKE '%“%' OR name LIKE '%â€%'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'é, ü, ó, etc',
|
||
'Accented chars (é, ü, ó)',
|
||
COUNT(*)
|
||
FROM customers
|
||
WHERE name REGEXP 'Ã[©¼³±§]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'â€" or â€"',
|
||
'Dashes (– —)',
|
||
COUNT(*)
|
||
FROM addresses
|
||
WHERE address LIKE '%â€"%' OR address LIKE '%â€"%'
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'Ä, ž, Å¡, etc',
|
||
'Czech/Polish chars',
|
||
COUNT(*)
|
||
FROM principles
|
||
WHERE address REGEXP '[ÄÅ][¾¡›™]';
|
||
|
||
SELECT '' as '';
|
||
SELECT '============================================================' as '';
|
||
SELECT 'END OF REPORT' as '';
|
||
SELECT '============================================================' as '';
|