192 lines
4.4 KiB
MySQL
192 lines
4.4 KiB
MySQL
|
|
-- ============================================================================
|
|||
|
|
-- Report Remaining Corrupted Data After Fix
|
|||
|
|
-- ============================================================================
|
|||
|
|
-- Run this after fix_corrupted_data.sql to find records that still need manual fixes
|
|||
|
|
--
|
|||
|
|
-- To run: docker exec cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/report_remaining_corruption.sql
|
|||
|
|
-- ============================================================================
|
|||
|
|
|
|||
|
|
SET NAMES utf8mb4;
|
|||
|
|
|
|||
|
|
SELECT '============================================================' as '';
|
|||
|
|
SELECT 'CORRUPTED DATA REPORT' as '';
|
|||
|
|
SELECT '============================================================' as '';
|
|||
|
|
|
|||
|
|
-- Summary counts
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT 'SUMMARY OF REMAINING CORRUPTION' as '';
|
|||
|
|
SELECT '-------------------------------' as '';
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'principles' as table_name,
|
|||
|
|
COUNT(*) as corrupted_records,
|
|||
|
|
'name, address, city' as affected_columns
|
|||
|
|
FROM principles
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
OR address REGEXP '[^ -~]'
|
|||
|
|
OR city REGEXP '[^ -~]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'customers',
|
|||
|
|
COUNT(*),
|
|||
|
|
'name'
|
|||
|
|
FROM customers
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'addresses',
|
|||
|
|
COUNT(*),
|
|||
|
|
'address, city'
|
|||
|
|
FROM addresses
|
|||
|
|
WHERE address REGEXP '[^ -~]'
|
|||
|
|
OR city REGEXP '[^ -~]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'contacts',
|
|||
|
|
COUNT(*),
|
|||
|
|
'name'
|
|||
|
|
FROM contacts
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'products',
|
|||
|
|
COUNT(*),
|
|||
|
|
'name, description'
|
|||
|
|
FROM products
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
OR description REGEXP '[^ -~]';
|
|||
|
|
|
|||
|
|
-- Detailed listings
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT '============================================================' as '';
|
|||
|
|
SELECT 'DETAILED CORRUPTED RECORDS' as '';
|
|||
|
|
SELECT '============================================================' as '';
|
|||
|
|
|
|||
|
|
-- Principles with corruption
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT 'PRINCIPLES TABLE - Corrupted Records:' as '';
|
|||
|
|
SELECT '-------------------------------------' as '';
|
|||
|
|
SELECT
|
|||
|
|
id,
|
|||
|
|
name,
|
|||
|
|
LEFT(address, 50) as address_preview,
|
|||
|
|
city
|
|||
|
|
FROM principles
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
OR address REGEXP '[^ -~]'
|
|||
|
|
OR city REGEXP '[^ -~]'
|
|||
|
|
ORDER BY id;
|
|||
|
|
|
|||
|
|
-- Customers with corruption
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT 'CUSTOMERS TABLE - Corrupted Records:' as '';
|
|||
|
|
SELECT '-------------------------------------' as '';
|
|||
|
|
SELECT
|
|||
|
|
id,
|
|||
|
|
name
|
|||
|
|
FROM customers
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
ORDER BY id;
|
|||
|
|
|
|||
|
|
-- Addresses with corruption
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT 'ADDRESSES TABLE - Corrupted Records:' as '';
|
|||
|
|
SELECT '-------------------------------------' as '';
|
|||
|
|
SELECT
|
|||
|
|
id,
|
|||
|
|
LEFT(address, 60) as address,
|
|||
|
|
city
|
|||
|
|
FROM addresses
|
|||
|
|
WHERE address REGEXP '[^ -~]'
|
|||
|
|
OR city REGEXP '[^ -~]'
|
|||
|
|
ORDER BY id
|
|||
|
|
LIMIT 50;
|
|||
|
|
|
|||
|
|
-- Contacts with corruption
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT 'CONTACTS TABLE - Corrupted Records:' as '';
|
|||
|
|
SELECT '-------------------------------------' as '';
|
|||
|
|
SELECT
|
|||
|
|
id,
|
|||
|
|
name
|
|||
|
|
FROM contacts
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
ORDER BY id
|
|||
|
|
LIMIT 50;
|
|||
|
|
|
|||
|
|
-- Products with corruption
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT 'PRODUCTS TABLE - Corrupted Records:' as '';
|
|||
|
|
SELECT '-------------------------------------' as '';
|
|||
|
|
SELECT
|
|||
|
|
id,
|
|||
|
|
name,
|
|||
|
|
LEFT(description, 50) as description_preview
|
|||
|
|
FROM products
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
OR description REGEXP '[^ -~]'
|
|||
|
|
ORDER BY id
|
|||
|
|
LIMIT 50;
|
|||
|
|
|
|||
|
|
-- Common corruption patterns found
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT '============================================================' as '';
|
|||
|
|
SELECT 'COMMON CORRUPTION PATTERNS DETECTED' as '';
|
|||
|
|
SELECT '============================================================' as '';
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'’' as mojibake_pattern,
|
|||
|
|
'Smart apostrophe (')' as should_be,
|
|||
|
|
COUNT(*) as occurrences_in_customers
|
|||
|
|
FROM customers
|
|||
|
|
WHERE name LIKE '%’%'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'“ or â€',
|
|||
|
|
'Smart quotes (" ")',
|
|||
|
|
COUNT(*)
|
|||
|
|
FROM customers
|
|||
|
|
WHERE name LIKE '%“%' OR name LIKE '%â€%'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'é, ü, ó, etc',
|
|||
|
|
'Accented chars (é, ü, ó)',
|
|||
|
|
COUNT(*)
|
|||
|
|
FROM customers
|
|||
|
|
WHERE name REGEXP 'Ã[©¼³±§]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'â€" or â€"',
|
|||
|
|
'Dashes (– —)',
|
|||
|
|
COUNT(*)
|
|||
|
|
FROM addresses
|
|||
|
|
WHERE address LIKE '%â€"%' OR address LIKE '%â€"%'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT
|
|||
|
|
'Ä, ž, Å¡, etc',
|
|||
|
|
'Czech/Polish chars',
|
|||
|
|
COUNT(*)
|
|||
|
|
FROM principles
|
|||
|
|
WHERE address REGEXP '[ÄÅ][¾¡›™]';
|
|||
|
|
|
|||
|
|
SELECT '' as '';
|
|||
|
|
SELECT '============================================================' as '';
|
|||
|
|
SELECT 'END OF REPORT' as '';
|
|||
|
|
SELECT '============================================================' as '';
|