cmc-sales/scripts/fix_corrupted_data_hex.sql
2025-12-23 15:54:26 +11:00

222 lines
13 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- ============================================================================
-- Fix Corrupted Character Data in CMC Database (Using HEX encoding)
-- ============================================================================
-- This script fixes mojibake using HEX encoding to avoid UTF-8 interpretation issues
-- Run after converting tables to utf8mb4
--
-- IMPORTANT: Review this script and test on a backup first!
-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data_hex.sql
-- ============================================================================
SET NAMES utf8mb4;
-- ============================================================================
-- PART 1: Fix CUSTOMERS table using HEX encoding
-- ============================================================================
-- Smart apostrophe: ’ → '
UPDATE customers SET name = REPLACE(name, UNHEX('E28099'), UNHEX('E28099')) WHERE name LIKE CONCAT('%', UNHEX('E28099'), '%');
-- Actually, let's use the simpler binary approach
UPDATE customers SET name = REPLACE(name, _binary 0xE28099, '''') WHERE name LIKE _binary CONCAT('%', 0xE28099, '%');
-- Left double quote: “ → "
UPDATE customers SET name = REPLACE(name, _binary 0xE2809C, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809C, '%');
-- Right double quote: †→ "
UPDATE customers SET name = REPLACE(name, _binary 0xE2809D, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809D, '%');
-- En dash: â€" →
UPDATE customers SET name = REPLACE(name, _binary 0xE28093, '') WHERE name LIKE _binary CONCAT('%', 0xE28093, '%');
-- Em dash: â€" → —
UPDATE customers SET name = REPLACE(name, _binary 0xE28094, '') WHERE name LIKE _binary CONCAT('%', 0xE28094, '%');
-- Zero-width space: ​ → (remove)
UPDATE customers SET name = REPLACE(name, _binary 0xE2808B, '') WHERE name LIKE _binary CONCAT('%', 0xE2808B, '%');
-- Em space:   → (regular space)
UPDATE customers SET name = REPLACE(name, _binary 0xE28083, ' ') WHERE name LIKE _binary CONCAT('%', 0xE28083, '%');
-- Accented é: é → é
UPDATE customers SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%');
-- Accented É: É → É
UPDATE customers SET name = REPLACE(name, _binary 0xC389, 'É') WHERE name LIKE _binary CONCAT('%', 0xC389, '%');
-- Accented ó: ó → ó
UPDATE customers SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%');
-- Accented í: Ã → í
UPDATE customers SET name = REPLACE(name, _binary 0xC3AD, 'í') WHERE name LIKE _binary CONCAT('%', 0xC3AD, '%');
-- Accented ç: ç → ç
UPDATE customers SET name = REPLACE(name, _binary 0xC3A7, 'ç') WHERE name LIKE _binary CONCAT('%', 0xC3A7, '%');
-- Accented ü: ü → ü
UPDATE customers SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%');
-- Accented á: á → á
UPDATE customers SET name = REPLACE(name, _binary 0xC3A1, 'á') WHERE name LIKE _binary CONCAT('%', 0xC3A1, '%');
-- Accented ñ: ñ → ñ
UPDATE customers SET name = REPLACE(name, _binary 0xC3B1, 'ñ') WHERE name LIKE _binary CONCAT('%', 0xC3B1, '%');
-- Accented ö: ö → ö
UPDATE customers SET name = REPLACE(name, _binary 0xC3B6, 'ö') WHERE name LIKE _binary CONCAT('%', 0xC3B6, '%');
-- Accented ô: ô → ô
UPDATE customers SET name = REPLACE(name, _binary 0xC3B4, 'ô') WHERE name LIKE _binary CONCAT('%', 0xC3B4, '%');
-- Turkish İ: İ → İ
UPDATE customers SET name = REPLACE(name, _binary 0xC4B0, 'İ') WHERE name LIKE _binary CONCAT('%', 0xC4B0, '%');
-- Turkish Ş: Å → Ş
UPDATE customers SET name = REPLACE(name, _binary 0xC59E, 'Ş') WHERE name LIKE _binary CONCAT('%', 0xC59E, '%');
-- Turkish Ğ: Ä → Ğ
UPDATE customers SET name = REPLACE(name, _binary 0xC49E, 'Ğ') WHERE name LIKE _binary CONCAT('%', 0xC49E, '%');
-- Czech Č: Ä → Č
UPDATE customers SET name = REPLACE(name, _binary 0xC48C, 'Č') WHERE name LIKE _binary CONCAT('%', 0xC48C, '%');
-- Czech ě: Ä› → ě
UPDATE customers SET name = REPLACE(name, _binary 0xC49B, 'ě') WHERE name LIKE _binary CONCAT('%', 0xC49B, '%');
-- Remove trailing tabs
UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t';
UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t';
-- ============================================================================
-- PART 2: Fix PRINCIPLES table
-- ============================================================================
-- Accented ü: ü → ü
UPDATE principles SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%');
UPDATE principles SET address = REPLACE(address, _binary 0xC3BC, 'ü') WHERE address LIKE _binary CONCAT('%', 0xC3BC, '%');
UPDATE principles SET city = REPLACE(city, _binary 0xC3BC, 'ü') WHERE city LIKE _binary CONCAT('%', 0xC3BC, '%');
-- Accented é: é → é
UPDATE principles SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%');
UPDATE principles SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%');
UPDATE principles SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%');
-- Accented ó: ó → ó
UPDATE principles SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%');
UPDATE principles SET city = REPLACE(city, _binary 0xC3B3, 'ó') WHERE city LIKE _binary CONCAT('%', 0xC3B3, '%');
-- German ß: ß → ß
UPDATE principles SET address = REPLACE(address, _binary 0xC39F, 'ß') WHERE address LIKE _binary CONCAT('%', 0xC39F, '%');
-- Accented ñ: ñ → ñ
UPDATE principles SET address = REPLACE(address, _binary 0xC3B1, 'ñ') WHERE address LIKE _binary CONCAT('%', 0xC3B1, '%');
-- Czech č: Ä → č
UPDATE principles SET address = REPLACE(address, _binary 0xC48D, 'č') WHERE address LIKE _binary CONCAT('%', 0xC48D, '%');
-- Czech ž: ž → ž
UPDATE principles SET address = REPLACE(address, _binary 0xC5BE, 'ž') WHERE address LIKE _binary CONCAT('%', 0xC5BE, '%');
-- Czech š: Å¡ → š
UPDATE principles SET address = REPLACE(address, _binary 0xC5A1, 'š') WHERE address LIKE _binary CONCAT('%', 0xC5A1, '%');
-- Czech ě: Ä› → ě
UPDATE principles SET address = REPLACE(address, _binary 0xC49B, 'ě') WHERE address LIKE _binary CONCAT('%', 0xC49B, '%');
-- Czech ň: Å → ň
UPDATE principles SET address = REPLACE(address, _binary 0xC588, 'ň') WHERE address LIKE _binary CONCAT('%', 0xC588, '%');
-- Czech ř: Å™ → ř
UPDATE principles SET address = REPLACE(address, _binary 0xC599, 'ř') WHERE address LIKE _binary CONCAT('%', 0xC599, '%');
-- Specific principle fixes
UPDATE principles SET address = 'Heinz-Fangman-Straße 18' WHERE id = 2;
UPDATE principles SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm', city = 'Rožnov pod Radhoštěm' WHERE id = 9;
UPDATE principles SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH' WHERE id = 13;
UPDATE principles SET address = 'Liebigstraße 2' WHERE id = 14;
UPDATE principles SET name = 'Nosia S.r.l.- Señalización Industrial' WHERE id = 58;
UPDATE principles SET address = 'Alte Emser Straße 32' WHERE id = 65;
-- ============================================================================
-- PART 3: Fix ADDRESSES table
-- ============================================================================
-- En dash: â€" →
UPDATE addresses SET address = REPLACE(address, _binary 0xE28093, '') WHERE address LIKE _binary CONCAT('%', 0xE28093, '%');
-- Accented é: é → é
UPDATE addresses SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%');
UPDATE addresses SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%');
-- Specific address fix
UPDATE addresses SET address = 'Lvl 3, Building B, 711 Talavera Road' WHERE id = 19;
-- ============================================================================
-- PART 4: Fix PRODUCTS table (using simple text for known values)
-- ============================================================================
-- Degree symbol: ° → °
UPDATE products SET title = REPLACE(title, CHAR(194,176), CHAR(176)) WHERE title LIKE CONCAT('%', CHAR(194,176), '%');
UPDATE products SET description = REPLACE(description, CHAR(194,176), CHAR(176)) WHERE description LIKE CONCAT('%', CHAR(194,176), '%');
UPDATE products SET item_description = REPLACE(item_description, CHAR(194,176), CHAR(176)) WHERE item_description LIKE CONCAT('%', CHAR(194,176), '%');
-- Registered trademark: ® → ®
UPDATE products SET title = REPLACE(title, CHAR(194,174), CHAR(174)) WHERE title LIKE CONCAT('%', CHAR(194,174), '%');
UPDATE products SET description = REPLACE(description, CHAR(194,174), CHAR(174)) WHERE description LIKE CONCAT('%', CHAR(194,174), '%');
UPDATE products SET item_description = REPLACE(item_description, CHAR(194,174), CHAR(174)) WHERE item_description LIKE CONCAT('%', CHAR(194,174), '%');
-- Trademark: â„¢ → ™ (0xE284A2 → 0x2122)
UPDATE products SET title = REPLACE(title, CHAR(226,132,162), CHAR(226,132,162)) WHERE title LIKE CONCAT('%', CHAR(226,132,162), '%');
UPDATE products SET description = REPLACE(description, CHAR(226,132,162), CHAR(226,132,162)) WHERE description LIKE CONCAT('%', CHAR(226,132,162), '%');
-- Smart apostrophe ’ → '
UPDATE products SET title = REPLACE(title, CHAR(226,128,153), '''') WHERE title LIKE CONCAT('%', CHAR(226,128,153), '%');
UPDATE products SET description = REPLACE(description, CHAR(226,128,153), '''') WHERE description LIKE CONCAT('%', CHAR(226,128,153), '%');
-- En dash â€" →
UPDATE products SET title = REPLACE(title, CHAR(226,128,147), '') WHERE title LIKE CONCAT('%', CHAR(226,128,147), '%');
UPDATE products SET description = REPLACE(description, CHAR(226,128,147), '') WHERE description LIKE CONCAT('%', CHAR(226,128,147), '%');
-- HTML entities (safe to use text)
UPDATE products SET title = REPLACE(title, '&ndash;', '') WHERE title LIKE '%&ndash;%';
UPDATE products SET description = REPLACE(description, '&ndash;', '') WHERE description LIKE '%&ndash;%';
UPDATE products SET title = REPLACE(title, '&rdquo;', '"') WHERE title LIKE '%&rdquo;%';
UPDATE products SET description = REPLACE(description, '&rdquo;', '"') WHERE description LIKE '%&rdquo;%';
UPDATE products SET title = REPLACE(title, '&ldquo;', '"') WHERE title LIKE '%&ldquo;%';
UPDATE products SET description = REPLACE(description, '&ldquo;', '"') WHERE description LIKE '%&ldquo;%';
UPDATE products SET title = REPLACE(title, '&amp;', '&') WHERE title LIKE '%&amp;%';
UPDATE products SET description = REPLACE(description, '&amp;', '&') WHERE description LIKE '%&amp;%';
UPDATE products SET title = REPLACE(title, '&frac12;', '½') WHERE title LIKE '%&frac12;%';
UPDATE products SET description = REPLACE(description, '&frac12;', '½') WHERE description LIKE '%&frac12;%';
UPDATE products SET title = REPLACE(title, '&deg;', '°') WHERE title LIKE '%&deg;%';
UPDATE products SET description = REPLACE(description, '&deg;', '°') WHERE description LIKE '%&deg;%';
UPDATE products SET title = REPLACE(title, '&nbsp;', ' ') WHERE title LIKE '%&nbsp;%';
UPDATE products SET description = REPLACE(description, '&nbsp;', ' ') WHERE description LIKE '%&nbsp;%';
-- Specific product fixes
UPDATE products SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' WHERE id = 30;
UPDATE products SET title = 'Mid-West Instrument Model 240 SC 02 O(TT)' WHERE id = 38;
UPDATE products SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' WHERE id = 67;
UPDATE products SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' WHERE id = 85;
-- ============================================================================
-- Verification
-- ============================================================================
SELECT '===========================================================' as '';
SELECT 'FIX COMPLETE - Verification Results' as '';
SELECT '===========================================================' as '';
SELECT 'Remaining corrupted principles:' as '', COUNT(*) as count
FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
SELECT 'Remaining corrupted customers:' as '', COUNT(*) as count
FROM customers WHERE name REGEXP '[^ -~]';
SELECT 'Remaining corrupted addresses:' as '', COUNT(*) as count
FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
SELECT 'Remaining corrupted products:' as '', COUNT(*) as count
FROM products WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]' OR item_description REGEXP '[^ -~]';
SELECT 'Fix script completed!' as status;