cmc-sales/scripts/fix_corrupted_data_hex.sql

222 lines
13 KiB
MySQL
Raw Permalink Normal View History

-- ============================================================================
-- Fix Corrupted Character Data in CMC Database (Using HEX encoding)
-- ============================================================================
-- This script fixes mojibake using HEX encoding to avoid UTF-8 interpretation issues
-- Run after converting tables to utf8mb4
--
-- IMPORTANT: Review this script and test on a backup first!
-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data_hex.sql
-- ============================================================================
SET NAMES utf8mb4;
-- ============================================================================
-- PART 1: Fix CUSTOMERS table using HEX encoding
-- ============================================================================
-- Smart apostrophe: ’ → '
UPDATE customers SET name = REPLACE(name, UNHEX('E28099'), UNHEX('E28099')) WHERE name LIKE CONCAT('%', UNHEX('E28099'), '%');
-- Actually, let's use the simpler binary approach
UPDATE customers SET name = REPLACE(name, _binary 0xE28099, '''') WHERE name LIKE _binary CONCAT('%', 0xE28099, '%');
-- Left double quote: “ → "
UPDATE customers SET name = REPLACE(name, _binary 0xE2809C, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809C, '%');
-- Right double quote: †→ "
UPDATE customers SET name = REPLACE(name, _binary 0xE2809D, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809D, '%');
-- En dash: â€" →
UPDATE customers SET name = REPLACE(name, _binary 0xE28093, '') WHERE name LIKE _binary CONCAT('%', 0xE28093, '%');
-- Em dash: â€" → —
UPDATE customers SET name = REPLACE(name, _binary 0xE28094, '') WHERE name LIKE _binary CONCAT('%', 0xE28094, '%');
-- Zero-width space: ​ → (remove)
UPDATE customers SET name = REPLACE(name, _binary 0xE2808B, '') WHERE name LIKE _binary CONCAT('%', 0xE2808B, '%');
-- Em space:   → (regular space)
UPDATE customers SET name = REPLACE(name, _binary 0xE28083, ' ') WHERE name LIKE _binary CONCAT('%', 0xE28083, '%');
-- Accented é: é → é
UPDATE customers SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%');
-- Accented É: É → É
UPDATE customers SET name = REPLACE(name, _binary 0xC389, 'É') WHERE name LIKE _binary CONCAT('%', 0xC389, '%');
-- Accented ó: ó → ó
UPDATE customers SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%');
-- Accented í: Ã → í
UPDATE customers SET name = REPLACE(name, _binary 0xC3AD, 'í') WHERE name LIKE _binary CONCAT('%', 0xC3AD, '%');
-- Accented ç: ç → ç
UPDATE customers SET name = REPLACE(name, _binary 0xC3A7, 'ç') WHERE name LIKE _binary CONCAT('%', 0xC3A7, '%');
-- Accented ü: ü → ü
UPDATE customers SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%');
-- Accented á: á → á
UPDATE customers SET name = REPLACE(name, _binary 0xC3A1, 'á') WHERE name LIKE _binary CONCAT('%', 0xC3A1, '%');
-- Accented ñ: ñ → ñ
UPDATE customers SET name = REPLACE(name, _binary 0xC3B1, 'ñ') WHERE name LIKE _binary CONCAT('%', 0xC3B1, '%');
-- Accented ö: ö → ö
UPDATE customers SET name = REPLACE(name, _binary 0xC3B6, 'ö') WHERE name LIKE _binary CONCAT('%', 0xC3B6, '%');
-- Accented ô: ô → ô
UPDATE customers SET name = REPLACE(name, _binary 0xC3B4, 'ô') WHERE name LIKE _binary CONCAT('%', 0xC3B4, '%');
-- Turkish İ: İ → İ
UPDATE customers SET name = REPLACE(name, _binary 0xC4B0, 'İ') WHERE name LIKE _binary CONCAT('%', 0xC4B0, '%');
-- Turkish Ş: Å → Ş
UPDATE customers SET name = REPLACE(name, _binary 0xC59E, 'Ş') WHERE name LIKE _binary CONCAT('%', 0xC59E, '%');
-- Turkish Ğ: Ä → Ğ
UPDATE customers SET name = REPLACE(name, _binary 0xC49E, 'Ğ') WHERE name LIKE _binary CONCAT('%', 0xC49E, '%');
-- Czech Č: Ä → Č
UPDATE customers SET name = REPLACE(name, _binary 0xC48C, 'Č') WHERE name LIKE _binary CONCAT('%', 0xC48C, '%');
-- Czech ě: Ä› → ě
UPDATE customers SET name = REPLACE(name, _binary 0xC49B, 'ě') WHERE name LIKE _binary CONCAT('%', 0xC49B, '%');
-- Remove trailing tabs
UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t';
UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t';
-- ============================================================================
-- PART 2: Fix PRINCIPLES table
-- ============================================================================
-- Accented ü: ü → ü
UPDATE principles SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%');
UPDATE principles SET address = REPLACE(address, _binary 0xC3BC, 'ü') WHERE address LIKE _binary CONCAT('%', 0xC3BC, '%');
UPDATE principles SET city = REPLACE(city, _binary 0xC3BC, 'ü') WHERE city LIKE _binary CONCAT('%', 0xC3BC, '%');
-- Accented é: é → é
UPDATE principles SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%');
UPDATE principles SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%');
UPDATE principles SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%');
-- Accented ó: ó → ó
UPDATE principles SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%');
UPDATE principles SET city = REPLACE(city, _binary 0xC3B3, 'ó') WHERE city LIKE _binary CONCAT('%', 0xC3B3, '%');
-- German ß: ß → ß
UPDATE principles SET address = REPLACE(address, _binary 0xC39F, 'ß') WHERE address LIKE _binary CONCAT('%', 0xC39F, '%');
-- Accented ñ: ñ → ñ
UPDATE principles SET address = REPLACE(address, _binary 0xC3B1, 'ñ') WHERE address LIKE _binary CONCAT('%', 0xC3B1, '%');
-- Czech č: Ä → č
UPDATE principles SET address = REPLACE(address, _binary 0xC48D, 'č') WHERE address LIKE _binary CONCAT('%', 0xC48D, '%');
-- Czech ž: ž → ž
UPDATE principles SET address = REPLACE(address, _binary 0xC5BE, 'ž') WHERE address LIKE _binary CONCAT('%', 0xC5BE, '%');
-- Czech š: Å¡ → š
UPDATE principles SET address = REPLACE(address, _binary 0xC5A1, 'š') WHERE address LIKE _binary CONCAT('%', 0xC5A1, '%');
-- Czech ě: Ä› → ě
UPDATE principles SET address = REPLACE(address, _binary 0xC49B, 'ě') WHERE address LIKE _binary CONCAT('%', 0xC49B, '%');
-- Czech ň: Å → ň
UPDATE principles SET address = REPLACE(address, _binary 0xC588, 'ň') WHERE address LIKE _binary CONCAT('%', 0xC588, '%');
-- Czech ř: Å™ → ř
UPDATE principles SET address = REPLACE(address, _binary 0xC599, 'ř') WHERE address LIKE _binary CONCAT('%', 0xC599, '%');
-- Specific principle fixes
UPDATE principles SET address = 'Heinz-Fangman-Straße 18' WHERE id = 2;
UPDATE principles SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm', city = 'Rožnov pod Radhoštěm' WHERE id = 9;
UPDATE principles SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH' WHERE id = 13;
UPDATE principles SET address = 'Liebigstraße 2' WHERE id = 14;
UPDATE principles SET name = 'Nosia S.r.l.- Señalización Industrial' WHERE id = 58;
UPDATE principles SET address = 'Alte Emser Straße 32' WHERE id = 65;
-- ============================================================================
-- PART 3: Fix ADDRESSES table
-- ============================================================================
-- En dash: â€" →
UPDATE addresses SET address = REPLACE(address, _binary 0xE28093, '') WHERE address LIKE _binary CONCAT('%', 0xE28093, '%');
-- Accented é: é → é
UPDATE addresses SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%');
UPDATE addresses SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%');
-- Specific address fix
UPDATE addresses SET address = 'Lvl 3, Building B, 711 Talavera Road' WHERE id = 19;
-- ============================================================================
-- PART 4: Fix PRODUCTS table (using simple text for known values)
-- ============================================================================
-- Degree symbol: ° → °
UPDATE products SET title = REPLACE(title, CHAR(194,176), CHAR(176)) WHERE title LIKE CONCAT('%', CHAR(194,176), '%');
UPDATE products SET description = REPLACE(description, CHAR(194,176), CHAR(176)) WHERE description LIKE CONCAT('%', CHAR(194,176), '%');
UPDATE products SET item_description = REPLACE(item_description, CHAR(194,176), CHAR(176)) WHERE item_description LIKE CONCAT('%', CHAR(194,176), '%');
-- Registered trademark: ® → ®
UPDATE products SET title = REPLACE(title, CHAR(194,174), CHAR(174)) WHERE title LIKE CONCAT('%', CHAR(194,174), '%');
UPDATE products SET description = REPLACE(description, CHAR(194,174), CHAR(174)) WHERE description LIKE CONCAT('%', CHAR(194,174), '%');
UPDATE products SET item_description = REPLACE(item_description, CHAR(194,174), CHAR(174)) WHERE item_description LIKE CONCAT('%', CHAR(194,174), '%');
-- Trademark: â„¢ → ™ (0xE284A2 → 0x2122)
UPDATE products SET title = REPLACE(title, CHAR(226,132,162), CHAR(226,132,162)) WHERE title LIKE CONCAT('%', CHAR(226,132,162), '%');
UPDATE products SET description = REPLACE(description, CHAR(226,132,162), CHAR(226,132,162)) WHERE description LIKE CONCAT('%', CHAR(226,132,162), '%');
-- Smart apostrophe ’ → '
UPDATE products SET title = REPLACE(title, CHAR(226,128,153), '''') WHERE title LIKE CONCAT('%', CHAR(226,128,153), '%');
UPDATE products SET description = REPLACE(description, CHAR(226,128,153), '''') WHERE description LIKE CONCAT('%', CHAR(226,128,153), '%');
-- En dash â€" →
UPDATE products SET title = REPLACE(title, CHAR(226,128,147), '') WHERE title LIKE CONCAT('%', CHAR(226,128,147), '%');
UPDATE products SET description = REPLACE(description, CHAR(226,128,147), '') WHERE description LIKE CONCAT('%', CHAR(226,128,147), '%');
-- HTML entities (safe to use text)
UPDATE products SET title = REPLACE(title, '&ndash;', '') WHERE title LIKE '%&ndash;%';
UPDATE products SET description = REPLACE(description, '&ndash;', '') WHERE description LIKE '%&ndash;%';
UPDATE products SET title = REPLACE(title, '&rdquo;', '"') WHERE title LIKE '%&rdquo;%';
UPDATE products SET description = REPLACE(description, '&rdquo;', '"') WHERE description LIKE '%&rdquo;%';
UPDATE products SET title = REPLACE(title, '&ldquo;', '"') WHERE title LIKE '%&ldquo;%';
UPDATE products SET description = REPLACE(description, '&ldquo;', '"') WHERE description LIKE '%&ldquo;%';
UPDATE products SET title = REPLACE(title, '&amp;', '&') WHERE title LIKE '%&amp;%';
UPDATE products SET description = REPLACE(description, '&amp;', '&') WHERE description LIKE '%&amp;%';
UPDATE products SET title = REPLACE(title, '&frac12;', '½') WHERE title LIKE '%&frac12;%';
UPDATE products SET description = REPLACE(description, '&frac12;', '½') WHERE description LIKE '%&frac12;%';
UPDATE products SET title = REPLACE(title, '&deg;', '°') WHERE title LIKE '%&deg;%';
UPDATE products SET description = REPLACE(description, '&deg;', '°') WHERE description LIKE '%&deg;%';
UPDATE products SET title = REPLACE(title, '&nbsp;', ' ') WHERE title LIKE '%&nbsp;%';
UPDATE products SET description = REPLACE(description, '&nbsp;', ' ') WHERE description LIKE '%&nbsp;%';
-- Specific product fixes
UPDATE products SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' WHERE id = 30;
UPDATE products SET title = 'Mid-West Instrument Model 240 SC 02 O(TT)' WHERE id = 38;
UPDATE products SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' WHERE id = 67;
UPDATE products SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' WHERE id = 85;
-- ============================================================================
-- Verification
-- ============================================================================
SELECT '===========================================================' as '';
SELECT 'FIX COMPLETE - Verification Results' as '';
SELECT '===========================================================' as '';
SELECT 'Remaining corrupted principles:' as '', COUNT(*) as count
FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
SELECT 'Remaining corrupted customers:' as '', COUNT(*) as count
FROM customers WHERE name REGEXP '[^ -~]';
SELECT 'Remaining corrupted addresses:' as '', COUNT(*) as count
FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
SELECT 'Remaining corrupted products:' as '', COUNT(*) as count
FROM products WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]' OR item_description REGEXP '[^ -~]';
SELECT 'Fix script completed!' as status;