-- ============================================================================ -- Fix Corrupted Character Data in CMC Database (Using HEX encoding) -- ============================================================================ -- This script fixes mojibake using HEX encoding to avoid UTF-8 interpretation issues -- Run after converting tables to utf8mb4 -- -- IMPORTANT: Review this script and test on a backup first! -- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data_hex.sql -- ============================================================================ SET NAMES utf8mb4; -- ============================================================================ -- PART 1: Fix CUSTOMERS table using HEX encoding -- ============================================================================ -- Smart apostrophe: ’ → ' UPDATE customers SET name = REPLACE(name, UNHEX('E28099'), UNHEX('E28099')) WHERE name LIKE CONCAT('%', UNHEX('E28099'), '%'); -- Actually, let's use the simpler binary approach UPDATE customers SET name = REPLACE(name, _binary 0xE28099, '''') WHERE name LIKE _binary CONCAT('%', 0xE28099, '%'); -- Left double quote: “ → " UPDATE customers SET name = REPLACE(name, _binary 0xE2809C, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809C, '%'); -- Right double quote: †→ " UPDATE customers SET name = REPLACE(name, _binary 0xE2809D, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809D, '%'); -- En dash: â€" → – UPDATE customers SET name = REPLACE(name, _binary 0xE28093, '–') WHERE name LIKE _binary CONCAT('%', 0xE28093, '%'); -- Em dash: â€" → — UPDATE customers SET name = REPLACE(name, _binary 0xE28094, '—') WHERE name LIKE _binary CONCAT('%', 0xE28094, '%'); -- Zero-width space: ​ → (remove) UPDATE customers SET name = REPLACE(name, _binary 0xE2808B, '') WHERE name LIKE _binary CONCAT('%', 0xE2808B, '%'); -- Em space:   → (regular space) UPDATE customers SET name = REPLACE(name, _binary 0xE28083, ' ') WHERE name LIKE _binary CONCAT('%', 0xE28083, '%'); -- Accented é: é → é UPDATE customers SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%'); -- Accented É: É → É UPDATE customers SET name = REPLACE(name, _binary 0xC389, 'É') WHERE name LIKE _binary CONCAT('%', 0xC389, '%'); -- Accented ó: ó → ó UPDATE customers SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%'); -- Accented í: à → í UPDATE customers SET name = REPLACE(name, _binary 0xC3AD, 'í') WHERE name LIKE _binary CONCAT('%', 0xC3AD, '%'); -- Accented ç: ç → ç UPDATE customers SET name = REPLACE(name, _binary 0xC3A7, 'ç') WHERE name LIKE _binary CONCAT('%', 0xC3A7, '%'); -- Accented ü: ü → ü UPDATE customers SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%'); -- Accented á: á → á UPDATE customers SET name = REPLACE(name, _binary 0xC3A1, 'á') WHERE name LIKE _binary CONCAT('%', 0xC3A1, '%'); -- Accented ñ: ñ → ñ UPDATE customers SET name = REPLACE(name, _binary 0xC3B1, 'ñ') WHERE name LIKE _binary CONCAT('%', 0xC3B1, '%'); -- Accented ö: ö → ö UPDATE customers SET name = REPLACE(name, _binary 0xC3B6, 'ö') WHERE name LIKE _binary CONCAT('%', 0xC3B6, '%'); -- Accented ô: ô → ô UPDATE customers SET name = REPLACE(name, _binary 0xC3B4, 'ô') WHERE name LIKE _binary CONCAT('%', 0xC3B4, '%'); -- Turkish İ: İ → İ UPDATE customers SET name = REPLACE(name, _binary 0xC4B0, 'İ') WHERE name LIKE _binary CONCAT('%', 0xC4B0, '%'); -- Turkish Ş: Å → Ş UPDATE customers SET name = REPLACE(name, _binary 0xC59E, 'Ş') WHERE name LIKE _binary CONCAT('%', 0xC59E, '%'); -- Turkish Ğ: Ä → Ğ UPDATE customers SET name = REPLACE(name, _binary 0xC49E, 'Ğ') WHERE name LIKE _binary CONCAT('%', 0xC49E, '%'); -- Czech Č: Ä → Č UPDATE customers SET name = REPLACE(name, _binary 0xC48C, 'Č') WHERE name LIKE _binary CONCAT('%', 0xC48C, '%'); -- Czech ě: Ä› → ě UPDATE customers SET name = REPLACE(name, _binary 0xC49B, 'ě') WHERE name LIKE _binary CONCAT('%', 0xC49B, '%'); -- Remove trailing tabs UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t'; UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t'; -- ============================================================================ -- PART 2: Fix PRINCIPLES table -- ============================================================================ -- Accented ü: ü → ü UPDATE principles SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%'); UPDATE principles SET address = REPLACE(address, _binary 0xC3BC, 'ü') WHERE address LIKE _binary CONCAT('%', 0xC3BC, '%'); UPDATE principles SET city = REPLACE(city, _binary 0xC3BC, 'ü') WHERE city LIKE _binary CONCAT('%', 0xC3BC, '%'); -- Accented é: é → é UPDATE principles SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%'); UPDATE principles SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%'); UPDATE principles SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%'); -- Accented ó: ó → ó UPDATE principles SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%'); UPDATE principles SET city = REPLACE(city, _binary 0xC3B3, 'ó') WHERE city LIKE _binary CONCAT('%', 0xC3B3, '%'); -- German ß: ß → ß UPDATE principles SET address = REPLACE(address, _binary 0xC39F, 'ß') WHERE address LIKE _binary CONCAT('%', 0xC39F, '%'); -- Accented ñ: ñ → ñ UPDATE principles SET address = REPLACE(address, _binary 0xC3B1, 'ñ') WHERE address LIKE _binary CONCAT('%', 0xC3B1, '%'); -- Czech č: Ä → č UPDATE principles SET address = REPLACE(address, _binary 0xC48D, 'č') WHERE address LIKE _binary CONCAT('%', 0xC48D, '%'); -- Czech ž: ž → ž UPDATE principles SET address = REPLACE(address, _binary 0xC5BE, 'ž') WHERE address LIKE _binary CONCAT('%', 0xC5BE, '%'); -- Czech š: Å¡ → š UPDATE principles SET address = REPLACE(address, _binary 0xC5A1, 'š') WHERE address LIKE _binary CONCAT('%', 0xC5A1, '%'); -- Czech ě: Ä› → ě UPDATE principles SET address = REPLACE(address, _binary 0xC49B, 'ě') WHERE address LIKE _binary CONCAT('%', 0xC49B, '%'); -- Czech ň: Å → ň UPDATE principles SET address = REPLACE(address, _binary 0xC588, 'ň') WHERE address LIKE _binary CONCAT('%', 0xC588, '%'); -- Czech ř: Å™ → ř UPDATE principles SET address = REPLACE(address, _binary 0xC599, 'ř') WHERE address LIKE _binary CONCAT('%', 0xC599, '%'); -- Specific principle fixes UPDATE principles SET address = 'Heinz-Fangman-Straße 18' WHERE id = 2; UPDATE principles SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm', city = 'Rožnov pod Radhoštěm' WHERE id = 9; UPDATE principles SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH' WHERE id = 13; UPDATE principles SET address = 'Liebigstraße 2' WHERE id = 14; UPDATE principles SET name = 'Nosia S.r.l.- Señalización Industrial' WHERE id = 58; UPDATE principles SET address = 'Alte Emser Straße 32' WHERE id = 65; -- ============================================================================ -- PART 3: Fix ADDRESSES table -- ============================================================================ -- En dash: â€" → – UPDATE addresses SET address = REPLACE(address, _binary 0xE28093, '–') WHERE address LIKE _binary CONCAT('%', 0xE28093, '%'); -- Accented é: é → é UPDATE addresses SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%'); UPDATE addresses SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%'); -- Specific address fix UPDATE addresses SET address = 'Lvl 3, Building B, 7–11 Talavera Road' WHERE id = 19; -- ============================================================================ -- PART 4: Fix PRODUCTS table (using simple text for known values) -- ============================================================================ -- Degree symbol: ° → ° UPDATE products SET title = REPLACE(title, CHAR(194,176), CHAR(176)) WHERE title LIKE CONCAT('%', CHAR(194,176), '%'); UPDATE products SET description = REPLACE(description, CHAR(194,176), CHAR(176)) WHERE description LIKE CONCAT('%', CHAR(194,176), '%'); UPDATE products SET item_description = REPLACE(item_description, CHAR(194,176), CHAR(176)) WHERE item_description LIKE CONCAT('%', CHAR(194,176), '%'); -- Registered trademark: ® → ® UPDATE products SET title = REPLACE(title, CHAR(194,174), CHAR(174)) WHERE title LIKE CONCAT('%', CHAR(194,174), '%'); UPDATE products SET description = REPLACE(description, CHAR(194,174), CHAR(174)) WHERE description LIKE CONCAT('%', CHAR(194,174), '%'); UPDATE products SET item_description = REPLACE(item_description, CHAR(194,174), CHAR(174)) WHERE item_description LIKE CONCAT('%', CHAR(194,174), '%'); -- Trademark: â„¢ → ™ (0xE284A2 → 0x2122) UPDATE products SET title = REPLACE(title, CHAR(226,132,162), CHAR(226,132,162)) WHERE title LIKE CONCAT('%', CHAR(226,132,162), '%'); UPDATE products SET description = REPLACE(description, CHAR(226,132,162), CHAR(226,132,162)) WHERE description LIKE CONCAT('%', CHAR(226,132,162), '%'); -- Smart apostrophe ’ → ' UPDATE products SET title = REPLACE(title, CHAR(226,128,153), '''') WHERE title LIKE CONCAT('%', CHAR(226,128,153), '%'); UPDATE products SET description = REPLACE(description, CHAR(226,128,153), '''') WHERE description LIKE CONCAT('%', CHAR(226,128,153), '%'); -- En dash â€" → – UPDATE products SET title = REPLACE(title, CHAR(226,128,147), '–') WHERE title LIKE CONCAT('%', CHAR(226,128,147), '%'); UPDATE products SET description = REPLACE(description, CHAR(226,128,147), '–') WHERE description LIKE CONCAT('%', CHAR(226,128,147), '%'); -- HTML entities (safe to use text) UPDATE products SET title = REPLACE(title, '–', '–') WHERE title LIKE '%–%'; UPDATE products SET description = REPLACE(description, '–', '–') WHERE description LIKE '%–%'; UPDATE products SET title = REPLACE(title, '”', '"') WHERE title LIKE '%”%'; UPDATE products SET description = REPLACE(description, '”', '"') WHERE description LIKE '%”%'; UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%'; UPDATE products SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%'; UPDATE products SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%'; UPDATE products SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%'; UPDATE products SET title = REPLACE(title, '½', '½') WHERE title LIKE '%½%'; UPDATE products SET description = REPLACE(description, '½', '½') WHERE description LIKE '%½%'; UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%'; UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%'; UPDATE products SET title = REPLACE(title, ' ', ' ') WHERE title LIKE '% %'; UPDATE products SET description = REPLACE(description, ' ', ' ') WHERE description LIKE '% %'; -- Specific product fixes UPDATE products SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' WHERE id = 30; UPDATE products SET title = 'Mid-West Instrument Model 240 – SC – 02 – O(TT)' WHERE id = 38; UPDATE products SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' WHERE id = 67; UPDATE products SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' WHERE id = 85; -- ============================================================================ -- Verification -- ============================================================================ SELECT '===========================================================' as ''; SELECT 'FIX COMPLETE - Verification Results' as ''; SELECT '===========================================================' as ''; SELECT 'Remaining corrupted principles:' as '', COUNT(*) as count FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]'; SELECT 'Remaining corrupted customers:' as '', COUNT(*) as count FROM customers WHERE name REGEXP '[^ -~]'; SELECT 'Remaining corrupted addresses:' as '', COUNT(*) as count FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]'; SELECT 'Remaining corrupted products:' as '', COUNT(*) as count FROM products WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]' OR item_description REGEXP '[^ -~]'; SELECT 'Fix script completed!' as status;