-- ============================================================================ -- Fix Corrupted Character Data in CMC Database -- ============================================================================ -- This script fixes mojibake (corrupted UTF-8 characters) in the database -- Run after converting tables to utf8mb4 -- -- IMPORTANT: Review this script and test on a backup first! -- Create backup: docker exec cmc-db mariadb-dump -u root -psecureRootPassword cmc | gzip > backup_before_fix.sql.gz -- -- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data.sql -- ============================================================================ -- Create a backup table for principles (optional safety measure) -- DROP TABLE IF EXISTS principles_backup; -- CREATE TABLE principles_backup AS SELECT * FROM principles; SET NAMES utf8mb4; -- ============================================================================ -- PART 1: Global Pattern Fixes Using REPLACE -- ============================================================================ -- These fix common mojibake patterns across all affected tables -- ---------------------------------------------------------------------------- -- Fix CUSTOMERS table -- ---------------------------------------------------------------------------- UPDATE customers SET name = REPLACE(name, '’', ''') WHERE name LIKE '%’%'; -- Smart apostrophe UPDATE customers SET name = REPLACE(name, '“', '"') WHERE name LIKE '%“%'; -- Left double quote UPDATE customers SET name = REPLACE(name, 'â€', '"') WHERE name LIKE '%â€%'; -- Right double quote UPDATE customers SET name = REPLACE(name, 'â€"', '–') WHERE name LIKE '%â€"%'; -- En dash UPDATE customers SET name = REPLACE(name, 'â€"', '—') WHERE name LIKE '%â€"%'; -- Em dash UPDATE customers SET name = REPLACE(name, '​', '') WHERE name LIKE '%​%'; -- Zero-width space (remove) UPDATE customers SET name = REPLACE(name, ' ', ' ') WHERE name LIKE '% %'; -- Special space UPDATE customers SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; -- é UPDATE customers SET name = REPLACE(name, 'É', 'É') WHERE name LIKE '%É%'; -- É UPDATE customers SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%'; -- ó UPDATE customers SET name = REPLACE(name, 'Ã', 'í') WHERE name LIKE '%Ã%'; -- í UPDATE customers SET name = REPLACE(name, 'ç', 'ç') WHERE name LIKE '%ç%'; -- ç UPDATE customers SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; -- ü UPDATE customers SET name = REPLACE(name, 'á', 'á') WHERE name LIKE '%á%'; -- á UPDATE customers SET name = REPLACE(name, 'ñ', 'ñ') WHERE name LIKE '%ñ%'; -- ñ UPDATE customers SET name = REPLACE(name, 'ö', 'ö') WHERE name LIKE '%ö%'; -- ö UPDATE customers SET name = REPLACE(name, 'ô', 'ô') WHERE name LIKE '%ô%'; -- ô UPDATE customers SET name = REPLACE(name, 'İ', 'İ') WHERE name LIKE '%İ%'; -- Turkish İ UPDATE customers SET name = REPLACE(name, 'Å', 'Ş') WHERE name LIKE '%Å%'; -- Turkish Ş UPDATE customers SET name = REPLACE(name, 'Ä', 'Ğ') WHERE name LIKE '%Ä%'; -- Turkish Ğ UPDATE customers SET name = REPLACE(name, 'Ä', 'Č') WHERE name LIKE '%Ä%'; -- Czech Č UPDATE customers SET name = REPLACE(name, 'Ä›', 'ě') WHERE name LIKE '%Ä›%'; -- Czech ě -- Remove trailing tabs UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t'; UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t'; -- ---------------------------------------------------------------------------- -- Fix PRINCIPLES table -- ---------------------------------------------------------------------------- UPDATE principles SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; UPDATE principles SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; UPDATE principles SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%'; UPDATE principles SET address = REPLACE(address, 'ß', 'ß') WHERE address LIKE '%ß%'; -- German ß UPDATE principles SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%'; UPDATE principles SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%'; UPDATE principles SET address = REPLACE(address, 'ñ', 'ñ') WHERE address LIKE '%ñ%'; UPDATE principles SET address = REPLACE(address, 'Ä', 'č') WHERE address LIKE '%Ä%'; -- Czech č UPDATE principles SET address = REPLACE(address, 'ž', 'ž') WHERE address LIKE '%ž%'; -- Czech ž UPDATE principles SET address = REPLACE(address, 'Å¡', 'š') WHERE address LIKE '%Å¡%'; -- Czech š UPDATE principles SET address = REPLACE(address, 'Ä›', 'ě') WHERE address LIKE '%Ä›%'; -- Czech ě UPDATE principles SET address = REPLACE(address, 'Å', 'ň') WHERE address LIKE '%Å%'; -- Czech ň UPDATE principles SET address = REPLACE(address, 'Å™', 'ř') WHERE address LIKE '%Å™%'; -- Czech ř UPDATE principles SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%'; UPDATE principles SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%'; UPDATE principles SET city = REPLACE(city, 'ó', 'ó') WHERE city LIKE '%ó%'; -- ---------------------------------------------------------------------------- -- Fix ADDRESSES table -- ---------------------------------------------------------------------------- UPDATE addresses SET address = REPLACE(address, 'â€"', '–') WHERE address LIKE '%â€"%'; -- En dash UPDATE addresses SET address = REPLACE(address, 'â€"', '—') WHERE address LIKE '%â€"%'; -- Em dash UPDATE addresses SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%'; UPDATE addresses SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%'; UPDATE addresses SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%'; UPDATE addresses SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%'; -- ============================================================================ -- PART 2: Specific Record Fixes -- ============================================================================ -- These fix specific known corrupted records where pattern matching isn't enough -- ---------------------------------------------------------------------------- -- PRINCIPLES - Specific fixes -- ---------------------------------------------------------------------------- -- ID 2: German ß UPDATE principles SET address = 'Heinz-Fangman-Straße 18' WHERE id = 2; -- ID 9: Czech characters (this is complex, needs full correction) UPDATE principles SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm', city = 'Rožnov pod Radhoštěm' WHERE id = 9; -- ID 13: German ü UPDATE principles SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH' WHERE id = 13; -- ID 14: German ß UPDATE principles SET address = 'Liebigstraße 2' WHERE id = 14; -- ID 58: Spanish ó UPDATE principles SET name = 'Nosia S.r.l.- Señalización Industrial' WHERE id = 58; -- ID 65: German ß UPDATE principles SET address = 'Alte Emser Straße 32' WHERE id = 65; -- ---------------------------------------------------------------------------- -- CUSTOMERS - Specific fixes -- ---------------------------------------------------------------------------- -- ID 253: É UPDATE customers SET name = 'SOLUZÉ CIVIL ENGINEERS Trading Under SOLUZE PTY LTD' WHERE id = 253; -- ID 1006: Smart apostrophe UPDATE customers SET name = 'Dr. Prem''s Molecules Private Limited (DPMolecules)' WHERE id = 1006; -- ID 1387: Smart quotes UPDATE customers SET name = 'DEE ENTERPRISES (QLD) PTY LTD trading as "MEKLEK"' WHERE id = 1387; -- ID 1608: Smart apostrophe UPDATE customers SET name = 'Guidera O''Connor Pty Ltd' WHERE id = 1608; -- ID 2174: Zero-width space UPDATE customers SET name = 'Ingredion ANZ Pty Ltd' WHERE id = 2174; -- ID 2215: French é UPDATE customers SET name = 'Vale Nouvelle-Calédonie S.A.S' WHERE id = 2215; -- ID 2375: Spanish ó UPDATE customers SET name = 'Evaluación y Control Ambiental S.A.S.' WHERE id = 2375; -- ID 3143: Portuguese ç and ó UPDATE customers SET name = 'Zontahevy Comércio e Serviços Offshore Ltda' WHERE id = 3143; -- ID 3529: French é UPDATE customers SET name = 'Société des Mines de Syama' WHERE id = 3529; -- ID 3633: Special space (em space) UPDATE customers SET name = 'P.J. Berriman & Co PTY LTD' WHERE id = 3633; -- ID 4325: Danish ø UPDATE customers SET name = 'Rambøll Danmark' WHERE id = 4325; -- ID 4350: Turkish characters İ, Ş, Ğ UPDATE customers SET name = 'SONNIVA ENERGY MAKİNA İNŞAAT İTH. İHR. LTD. ŞTİ.' WHERE id = 4350; -- ID 4669: French é UPDATE customers SET name = 'F.C.C. Fluides Conseils Calédonie SARL' WHERE id = 4669; -- ID 4743: Czech ě UPDATE customers SET name = 'DGPack Prostějov' WHERE id = 4743; -- ID 4764: Zero-width space UPDATE customers SET name = 'Mccready Welding Pty Ltd Trading under the entity Mccready''s Welding Services' WHERE id = 4764; -- ID 4893: Spanish í UPDATE customers SET name = 'Oxiquímica, S.A.P.I. de C.V.' WHERE id = 4893; -- ---------------------------------------------------------------------------- -- ADDRESSES - Specific fixes -- ---------------------------------------------------------------------------- -- ID 19: En dash (7–11) UPDATE addresses SET address = 'Lvl 3, Building B, 7–11 Talavera Road' WHERE id = 19; -- ============================================================================ -- PART 3: Verification Queries -- ============================================================================ -- Run these after the fixes to verify they worked -- Check fixed principles SELECT id, name, city FROM principles WHERE id IN (2, 9, 13, 14, 58, 65); -- Check fixed customers SELECT id, name FROM customers WHERE id IN (253, 1006, 1387, 1608, 2174, 2215, 2375, 3143, 3529, 4325, 4350, 4669, 4743, 4764, 4893); -- Check fixed addresses SELECT id, address FROM addresses WHERE id = 19; -- Count remaining corrupted records (should be much lower) SELECT COUNT(*) as remaining_corrupted_principles FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]'; SELECT COUNT(*) as remaining_corrupted_customers FROM customers WHERE name REGEXP '[^ -~]'; SELECT COUNT(*) as remaining_corrupted_addresses FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]'; -- ============================================================================ -- COMPLETION -- ============================================================================ SELECT 'Fix script completed! Review the verification queries above.' as status;