222 lines
13 KiB
SQL
222 lines
13 KiB
SQL
-- ============================================================================
|
||
-- Fix Corrupted Character Data in CMC Database (Using HEX encoding)
|
||
-- ============================================================================
|
||
-- This script fixes mojibake using HEX encoding to avoid UTF-8 interpretation issues
|
||
-- Run after converting tables to utf8mb4
|
||
--
|
||
-- IMPORTANT: Review this script and test on a backup first!
|
||
-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data_hex.sql
|
||
-- ============================================================================
|
||
|
||
SET NAMES utf8mb4;
|
||
|
||
-- ============================================================================
|
||
-- PART 1: Fix CUSTOMERS table using HEX encoding
|
||
-- ============================================================================
|
||
|
||
-- Smart apostrophe: ’ → '
|
||
UPDATE customers SET name = REPLACE(name, UNHEX('E28099'), UNHEX('E28099')) WHERE name LIKE CONCAT('%', UNHEX('E28099'), '%');
|
||
-- Actually, let's use the simpler binary approach
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xE28099, '''') WHERE name LIKE _binary CONCAT('%', 0xE28099, '%');
|
||
|
||
-- Left double quote: “ → "
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xE2809C, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809C, '%');
|
||
|
||
-- Right double quote: †→ "
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xE2809D, '"') WHERE name LIKE _binary CONCAT('%', 0xE2809D, '%');
|
||
|
||
-- En dash: â€" → –
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xE28093, '–') WHERE name LIKE _binary CONCAT('%', 0xE28093, '%');
|
||
|
||
-- Em dash: â€" → —
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xE28094, '—') WHERE name LIKE _binary CONCAT('%', 0xE28094, '%');
|
||
|
||
-- Zero-width space: ​ → (remove)
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xE2808B, '') WHERE name LIKE _binary CONCAT('%', 0xE2808B, '%');
|
||
|
||
-- Em space:   → (regular space)
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xE28083, ' ') WHERE name LIKE _binary CONCAT('%', 0xE28083, '%');
|
||
|
||
-- Accented é: é → é
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%');
|
||
|
||
-- Accented É: É → É
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC389, 'É') WHERE name LIKE _binary CONCAT('%', 0xC389, '%');
|
||
|
||
-- Accented ó: ó → ó
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%');
|
||
|
||
-- Accented í: Ã → í
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3AD, 'í') WHERE name LIKE _binary CONCAT('%', 0xC3AD, '%');
|
||
|
||
-- Accented ç: ç → ç
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3A7, 'ç') WHERE name LIKE _binary CONCAT('%', 0xC3A7, '%');
|
||
|
||
-- Accented ü: ü → ü
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%');
|
||
|
||
-- Accented á: á → á
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3A1, 'á') WHERE name LIKE _binary CONCAT('%', 0xC3A1, '%');
|
||
|
||
-- Accented ñ: ñ → ñ
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3B1, 'ñ') WHERE name LIKE _binary CONCAT('%', 0xC3B1, '%');
|
||
|
||
-- Accented ö: ö → ö
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3B6, 'ö') WHERE name LIKE _binary CONCAT('%', 0xC3B6, '%');
|
||
|
||
-- Accented ô: ô → ô
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC3B4, 'ô') WHERE name LIKE _binary CONCAT('%', 0xC3B4, '%');
|
||
|
||
-- Turkish İ: İ → İ
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC4B0, 'İ') WHERE name LIKE _binary CONCAT('%', 0xC4B0, '%');
|
||
|
||
-- Turkish Ş: Å → Ş
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC59E, 'Ş') WHERE name LIKE _binary CONCAT('%', 0xC59E, '%');
|
||
|
||
-- Turkish Ğ: Ä → Ğ
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC49E, 'Ğ') WHERE name LIKE _binary CONCAT('%', 0xC49E, '%');
|
||
|
||
-- Czech Č: Ä → Č
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC48C, 'Č') WHERE name LIKE _binary CONCAT('%', 0xC48C, '%');
|
||
|
||
-- Czech ě: Ä› → ě
|
||
UPDATE customers SET name = REPLACE(name, _binary 0xC49B, 'ě') WHERE name LIKE _binary CONCAT('%', 0xC49B, '%');
|
||
|
||
-- Remove trailing tabs
|
||
UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t';
|
||
UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t';
|
||
|
||
-- ============================================================================
|
||
-- PART 2: Fix PRINCIPLES table
|
||
-- ============================================================================
|
||
|
||
-- Accented ü: ü → ü
|
||
UPDATE principles SET name = REPLACE(name, _binary 0xC3BC, 'ü') WHERE name LIKE _binary CONCAT('%', 0xC3BC, '%');
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC3BC, 'ü') WHERE address LIKE _binary CONCAT('%', 0xC3BC, '%');
|
||
UPDATE principles SET city = REPLACE(city, _binary 0xC3BC, 'ü') WHERE city LIKE _binary CONCAT('%', 0xC3BC, '%');
|
||
|
||
-- Accented é: é → é
|
||
UPDATE principles SET name = REPLACE(name, _binary 0xC3A9, 'é') WHERE name LIKE _binary CONCAT('%', 0xC3A9, '%');
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%');
|
||
UPDATE principles SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%');
|
||
|
||
-- Accented ó: ó → ó
|
||
UPDATE principles SET name = REPLACE(name, _binary 0xC3B3, 'ó') WHERE name LIKE _binary CONCAT('%', 0xC3B3, '%');
|
||
UPDATE principles SET city = REPLACE(city, _binary 0xC3B3, 'ó') WHERE city LIKE _binary CONCAT('%', 0xC3B3, '%');
|
||
|
||
-- German ß: ß → ß
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC39F, 'ß') WHERE address LIKE _binary CONCAT('%', 0xC39F, '%');
|
||
|
||
-- Accented ñ: ñ → ñ
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC3B1, 'ñ') WHERE address LIKE _binary CONCAT('%', 0xC3B1, '%');
|
||
|
||
-- Czech č: Ä → č
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC48D, 'č') WHERE address LIKE _binary CONCAT('%', 0xC48D, '%');
|
||
|
||
-- Czech ž: ž → ž
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC5BE, 'ž') WHERE address LIKE _binary CONCAT('%', 0xC5BE, '%');
|
||
|
||
-- Czech š: Å¡ → š
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC5A1, 'š') WHERE address LIKE _binary CONCAT('%', 0xC5A1, '%');
|
||
|
||
-- Czech ě: Ä› → ě
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC49B, 'ě') WHERE address LIKE _binary CONCAT('%', 0xC49B, '%');
|
||
|
||
-- Czech ň: Å → ň
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC588, 'ň') WHERE address LIKE _binary CONCAT('%', 0xC588, '%');
|
||
|
||
-- Czech ř: Å™ → ř
|
||
UPDATE principles SET address = REPLACE(address, _binary 0xC599, 'ř') WHERE address LIKE _binary CONCAT('%', 0xC599, '%');
|
||
|
||
-- Specific principle fixes
|
||
UPDATE principles SET address = 'Heinz-Fangman-Straße 18' WHERE id = 2;
|
||
UPDATE principles SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm', city = 'Rožnov pod Radhoštěm' WHERE id = 9;
|
||
UPDATE principles SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH' WHERE id = 13;
|
||
UPDATE principles SET address = 'Liebigstraße 2' WHERE id = 14;
|
||
UPDATE principles SET name = 'Nosia S.r.l.- Señalización Industrial' WHERE id = 58;
|
||
UPDATE principles SET address = 'Alte Emser Straße 32' WHERE id = 65;
|
||
|
||
-- ============================================================================
|
||
-- PART 3: Fix ADDRESSES table
|
||
-- ============================================================================
|
||
|
||
-- En dash: â€" → –
|
||
UPDATE addresses SET address = REPLACE(address, _binary 0xE28093, '–') WHERE address LIKE _binary CONCAT('%', 0xE28093, '%');
|
||
|
||
-- Accented é: é → é
|
||
UPDATE addresses SET address = REPLACE(address, _binary 0xC3A9, 'é') WHERE address LIKE _binary CONCAT('%', 0xC3A9, '%');
|
||
UPDATE addresses SET city = REPLACE(city, _binary 0xC3A9, 'é') WHERE city LIKE _binary CONCAT('%', 0xC3A9, '%');
|
||
|
||
-- Specific address fix
|
||
UPDATE addresses SET address = 'Lvl 3, Building B, 7–11 Talavera Road' WHERE id = 19;
|
||
|
||
-- ============================================================================
|
||
-- PART 4: Fix PRODUCTS table (using simple text for known values)
|
||
-- ============================================================================
|
||
|
||
-- Degree symbol: ° → °
|
||
UPDATE products SET title = REPLACE(title, CHAR(194,176), CHAR(176)) WHERE title LIKE CONCAT('%', CHAR(194,176), '%');
|
||
UPDATE products SET description = REPLACE(description, CHAR(194,176), CHAR(176)) WHERE description LIKE CONCAT('%', CHAR(194,176), '%');
|
||
UPDATE products SET item_description = REPLACE(item_description, CHAR(194,176), CHAR(176)) WHERE item_description LIKE CONCAT('%', CHAR(194,176), '%');
|
||
|
||
-- Registered trademark: ® → ®
|
||
UPDATE products SET title = REPLACE(title, CHAR(194,174), CHAR(174)) WHERE title LIKE CONCAT('%', CHAR(194,174), '%');
|
||
UPDATE products SET description = REPLACE(description, CHAR(194,174), CHAR(174)) WHERE description LIKE CONCAT('%', CHAR(194,174), '%');
|
||
UPDATE products SET item_description = REPLACE(item_description, CHAR(194,174), CHAR(174)) WHERE item_description LIKE CONCAT('%', CHAR(194,174), '%');
|
||
|
||
-- Trademark: â„¢ → ™ (0xE284A2 → 0x2122)
|
||
UPDATE products SET title = REPLACE(title, CHAR(226,132,162), CHAR(226,132,162)) WHERE title LIKE CONCAT('%', CHAR(226,132,162), '%');
|
||
UPDATE products SET description = REPLACE(description, CHAR(226,132,162), CHAR(226,132,162)) WHERE description LIKE CONCAT('%', CHAR(226,132,162), '%');
|
||
|
||
-- Smart apostrophe ’ → '
|
||
UPDATE products SET title = REPLACE(title, CHAR(226,128,153), '''') WHERE title LIKE CONCAT('%', CHAR(226,128,153), '%');
|
||
UPDATE products SET description = REPLACE(description, CHAR(226,128,153), '''') WHERE description LIKE CONCAT('%', CHAR(226,128,153), '%');
|
||
|
||
-- En dash â€" → –
|
||
UPDATE products SET title = REPLACE(title, CHAR(226,128,147), '–') WHERE title LIKE CONCAT('%', CHAR(226,128,147), '%');
|
||
UPDATE products SET description = REPLACE(description, CHAR(226,128,147), '–') WHERE description LIKE CONCAT('%', CHAR(226,128,147), '%');
|
||
|
||
-- HTML entities (safe to use text)
|
||
UPDATE products SET title = REPLACE(title, '–', '–') WHERE title LIKE '%–%';
|
||
UPDATE products SET description = REPLACE(description, '–', '–') WHERE description LIKE '%–%';
|
||
UPDATE products SET title = REPLACE(title, '”', '"') WHERE title LIKE '%”%';
|
||
UPDATE products SET description = REPLACE(description, '”', '"') WHERE description LIKE '%”%';
|
||
UPDATE products SET title = REPLACE(title, '“', '"') WHERE title LIKE '%“%';
|
||
UPDATE products SET description = REPLACE(description, '“', '"') WHERE description LIKE '%“%';
|
||
UPDATE products SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%';
|
||
UPDATE products SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%';
|
||
UPDATE products SET title = REPLACE(title, '½', '½') WHERE title LIKE '%½%';
|
||
UPDATE products SET description = REPLACE(description, '½', '½') WHERE description LIKE '%½%';
|
||
UPDATE products SET title = REPLACE(title, '°', '°') WHERE title LIKE '%°%';
|
||
UPDATE products SET description = REPLACE(description, '°', '°') WHERE description LIKE '%°%';
|
||
UPDATE products SET title = REPLACE(title, ' ', ' ') WHERE title LIKE '% %';
|
||
UPDATE products SET description = REPLACE(description, ' ', ' ') WHERE description LIKE '% %';
|
||
|
||
-- Specific product fixes
|
||
UPDATE products SET title = 'C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable' WHERE id = 30;
|
||
UPDATE products SET title = 'Mid-West Instrument Model 240 – SC – 02 – O(TT)' WHERE id = 38;
|
||
UPDATE products SET title = 'Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp' WHERE id = 67;
|
||
UPDATE products SET title = 'Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable' WHERE id = 85;
|
||
|
||
-- ============================================================================
|
||
-- Verification
|
||
-- ============================================================================
|
||
|
||
SELECT '===========================================================' as '';
|
||
SELECT 'FIX COMPLETE - Verification Results' as '';
|
||
SELECT '===========================================================' as '';
|
||
|
||
SELECT 'Remaining corrupted principles:' as '', COUNT(*) as count
|
||
FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
|
||
|
||
SELECT 'Remaining corrupted customers:' as '', COUNT(*) as count
|
||
FROM customers WHERE name REGEXP '[^ -~]';
|
||
|
||
SELECT 'Remaining corrupted addresses:' as '', COUNT(*) as count
|
||
FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
|
||
|
||
SELECT 'Remaining corrupted products:' as '', COUNT(*) as count
|
||
FROM products WHERE title REGEXP '[^ -~]' OR description REGEXP '[^ -~]' OR item_description REGEXP '[^ -~]';
|
||
|
||
SELECT 'Fix script completed!' as status;
|