2025-12-09 12:23:22 -08:00
-- ============================================================================
-- Fix Corrupted Character Data in CMC Database
-- ============================================================================
-- This script fixes mojibake (corrupted UTF-8 characters) in the database
-- Run after converting tables to utf8mb4
--
-- IMPORTANT: Review this script and test on a backup first!
-- Create backup: docker exec cmc-db mariadb-dump -u root -psecureRootPassword cmc | gzip > backup_before_fix.sql.gz
--
-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data.sql
-- ============================================================================
-- Create a backup table for principles (optional safety measure)
-- DROP TABLE IF EXISTS principles_backup;
-- CREATE TABLE principles_backup AS SELECT * FROM principles;
SET NAMES utf8mb4 ;
-- ============================================================================
-- PART 1: Global Pattern Fixes Using REPLACE
-- ============================================================================
-- These fix common mojibake patterns across all affected tables
-- ----------------------------------------------------------------------------
-- Fix CUSTOMERS table
-- ----------------------------------------------------------------------------
UPDATE customers SET name = REPLACE ( name , ' ’ ' , ' '' ) WHERE name LIKE ' % â € ™ % ' ; -- Smart apostrophe
UPDATE customers SET name = REPLACE ( name , ' “ ' , ' " ' ) WHERE name LIKE ' %“% ' ; -- Left double quote
UPDATE customers SET name = REPLACE ( name , ' †' , ' " ' ) WHERE name LIKE ' %â€% ' ; -- Right double quote
UPDATE customers SET name = REPLACE ( name , ' â€" ' , ' – ' ) WHERE name LIKE ' %â€"% ' ; -- En dash
UPDATE customers SET name = REPLACE ( name , ' â€" ' , ' — ' ) WHERE name LIKE ' %â€"% ' ; -- Em dash
UPDATE customers SET name = REPLACE ( name , ' ​ ' , ' ' ) WHERE name LIKE ' %​% ' ; -- Zero-width space (remove)
UPDATE customers SET name = REPLACE ( name , '   ' , ' ' ) WHERE name LIKE ' % % ' ; -- Special space
UPDATE customers SET name = REPLACE ( name , ' é ' , ' é ' ) WHERE name LIKE ' %é% ' ; -- é
UPDATE customers SET name = REPLACE ( name , ' É ' , ' É ' ) WHERE name LIKE ' %É% ' ; -- É
UPDATE customers SET name = REPLACE ( name , ' ó ' , ' ó ' ) WHERE name LIKE ' %ó% ' ; -- ó
UPDATE customers SET name = REPLACE ( name , ' Ã ' , ' í ' ) WHERE name LIKE ' %Ã% ' ; -- í
UPDATE customers SET name = REPLACE ( name , ' ç ' , ' ç ' ) WHERE name LIKE ' %ç% ' ; -- ç
UPDATE customers SET name = REPLACE ( name , ' ü ' , ' ü ' ) WHERE name LIKE ' %ü% ' ; -- ü
UPDATE customers SET name = REPLACE ( name , ' á ' , ' á ' ) WHERE name LIKE ' %á% ' ; -- á
UPDATE customers SET name = REPLACE ( name , ' ñ ' , ' ñ ' ) WHERE name LIKE ' %ñ% ' ; -- ñ
UPDATE customers SET name = REPLACE ( name , ' ö ' , ' ö ' ) WHERE name LIKE ' %ö% ' ; -- ö
UPDATE customers SET name = REPLACE ( name , ' ô ' , ' ô ' ) WHERE name LIKE ' %ô% ' ; -- ô
UPDATE customers SET name = REPLACE ( name , ' İ ' , ' İ ' ) WHERE name LIKE ' %İ% ' ; -- Turkish İ
UPDATE customers SET name = REPLACE ( name , ' Å ' , ' Ş ' ) WHERE name LIKE ' %Å% ' ; -- Turkish Ş
UPDATE customers SET name = REPLACE ( name , ' Ä ' , ' Ğ ' ) WHERE name LIKE ' %Ä% ' ; -- Turkish Ğ
UPDATE customers SET name = REPLACE ( name , ' Ä ' , ' Č ' ) WHERE name LIKE ' %Ä% ' ; -- Czech Č
UPDATE customers SET name = REPLACE ( name , ' Ä› ' , ' ě ' ) WHERE name LIKE ' %Ä›% ' ; -- Czech ě
-- Remove trailing tabs
UPDATE customers SET name = TRIM ( TRAILING ' \t ' FROM name ) WHERE name LIKE ' %\t ' ;
UPDATE customers SET trading_name = TRIM ( TRAILING ' \t ' FROM trading_name ) WHERE trading_name LIKE ' %\t ' ;
-- ----------------------------------------------------------------------------
-- Fix PRINCIPLES table
-- ----------------------------------------------------------------------------
UPDATE principles SET name = REPLACE ( name , ' ü ' , ' ü ' ) WHERE name LIKE ' %ü% ' ;
UPDATE principles SET name = REPLACE ( name , ' é ' , ' é ' ) WHERE name LIKE ' %é% ' ;
UPDATE principles SET name = REPLACE ( name , ' ó ' , ' ó ' ) WHERE name LIKE ' %ó% ' ;
UPDATE principles SET address = REPLACE ( address , ' ß ' , ' ß ' ) WHERE address LIKE ' %ß% ' ; -- German ß
UPDATE principles SET address = REPLACE ( address , ' ü ' , ' ü ' ) WHERE address LIKE ' %ü% ' ;
UPDATE principles SET address = REPLACE ( address , ' é ' , ' é ' ) WHERE address LIKE ' %é% ' ;
UPDATE principles SET address = REPLACE ( address , ' ñ ' , ' ñ ' ) WHERE address LIKE ' %ñ% ' ;
UPDATE principles SET address = REPLACE ( address , ' Ä ' , ' č ' ) WHERE address LIKE ' %Ä% ' ; -- Czech č
UPDATE principles SET address = REPLACE ( address , ' ž ' , ' ž ' ) WHERE address LIKE ' %ž% ' ; -- Czech ž
UPDATE principles SET address = REPLACE ( address , ' Å¡ ' , ' š ' ) WHERE address LIKE ' %Å¡% ' ; -- Czech š
UPDATE principles SET address = REPLACE ( address , ' Ä› ' , ' ě ' ) WHERE address LIKE ' %Ä›% ' ; -- Czech ě
UPDATE principles SET address = REPLACE ( address , ' Å ' , ' ň ' ) WHERE address LIKE ' %Å% ' ; -- Czech ň
UPDATE principles SET address = REPLACE ( address , ' Å™ ' , ' ř ' ) WHERE address LIKE ' %Å™% ' ; -- Czech ř
UPDATE principles SET city = REPLACE ( city , ' ü ' , ' ü ' ) WHERE city LIKE ' %ü% ' ;
UPDATE principles SET city = REPLACE ( city , ' é ' , ' é ' ) WHERE city LIKE ' %é% ' ;
UPDATE principles SET city = REPLACE ( city , ' ó ' , ' ó ' ) WHERE city LIKE ' %ó% ' ;
-- ----------------------------------------------------------------------------
-- Fix ADDRESSES table
-- ----------------------------------------------------------------------------
UPDATE addresses SET address = REPLACE ( address , ' â€" ' , ' – ' ) WHERE address LIKE ' %â€"% ' ; -- En dash
UPDATE addresses SET address = REPLACE ( address , ' â€" ' , ' — ' ) WHERE address LIKE ' %â€"% ' ; -- Em dash
UPDATE addresses SET address = REPLACE ( address , ' é ' , ' é ' ) WHERE address LIKE ' %é% ' ;
UPDATE addresses SET address = REPLACE ( address , ' ü ' , ' ü ' ) WHERE address LIKE ' %ü% ' ;
UPDATE addresses SET city = REPLACE ( city , ' é ' , ' é ' ) WHERE city LIKE ' %é% ' ;
UPDATE addresses SET city = REPLACE ( city , ' ü ' , ' ü ' ) WHERE city LIKE ' %ü% ' ;
-- ============================================================================
-- PART 2: Specific Record Fixes
-- ============================================================================
-- These fix specific known corrupted records where pattern matching isn't enough
-- ----------------------------------------------------------------------------
-- PRINCIPLES - Specific fixes
-- ----------------------------------------------------------------------------
-- ID 2: German ß
UPDATE principles
SET address = ' Heinz-Fangman-Straße 18 '
WHERE id = 2 ;
-- ID 9: Czech characters (this is complex, needs full correction)
UPDATE principles
SET address = ' Bezručova 2901\n756 61 Rožnov pod Radhoštěm ' ,
city = ' Rožnov pod Radhoštěm '
WHERE id = 9 ;
-- ID 13: German ü
UPDATE principles
SET name = ' IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH '
WHERE id = 13 ;
-- ID 14: German ß
UPDATE principles
SET address = ' Liebigstraße 2 '
WHERE id = 14 ;
-- ID 58: Spanish ó
UPDATE principles
SET name = ' Nosia S.r.l.- Señalización Industrial '
WHERE id = 58 ;
-- ID 65: German ß
UPDATE principles
SET address = ' Alte Emser Straße 32 '
WHERE id = 65 ;
-- ----------------------------------------------------------------------------
-- CUSTOMERS - Specific fixes
-- ----------------------------------------------------------------------------
-- ID 253: É
UPDATE customers
SET name = ' SOLUZÉ CIVIL ENGINEERS Trading Under SOLUZE PTY LTD '
WHERE id = 253 ;
-- ID 1006: Smart apostrophe
UPDATE customers
SET name = ' Dr. Prem '' s Molecules Private Limited (DPMolecules) '
WHERE id = 1006 ;
-- ID 1387: Smart quotes
UPDATE customers
SET name = ' DEE ENTERPRISES (QLD) PTY LTD trading as "MEKLEK" '
WHERE id = 1387 ;
-- ID 1608: Smart apostrophe
UPDATE customers
SET name = ' Guidera O '' Connor Pty Ltd '
WHERE id = 1608 ;
-- ID 2174: Zero-width space
UPDATE customers
SET name = ' Ingredion ANZ Pty Ltd '
WHERE id = 2174 ;
-- ID 2215: French é
UPDATE customers
SET name = ' Vale Nouvelle-Calédonie S.A.S '
WHERE id = 2215 ;
-- ID 2375: Spanish ó
UPDATE customers
SET name = ' Evaluación y Control Ambiental S.A.S. '
WHERE id = 2375 ;
-- ID 3143: Portuguese ç and ó
UPDATE customers
SET name = ' Zontahevy Comércio e Serviços Offshore Ltda '
WHERE id = 3143 ;
-- ID 3529: French é
UPDATE customers
SET name = ' Société des Mines de Syama '
WHERE id = 3529 ;
-- ID 3633: Special space (em space)
UPDATE customers
SET name = ' P.J. Berriman & Co PTY LTD '
WHERE id = 3633 ;
-- ID 4325: Danish ø
UPDATE customers
SET name = ' Rambøll Danmark '
WHERE id = 4325 ;
-- ID 4350: Turkish characters İ, Ş, Ğ
UPDATE customers
SET name = ' SONNIVA ENERGY MAKİNA İNŞAAT İTH. İHR. LTD. ŞTİ. '
WHERE id = 4350 ;
-- ID 4669: French é
UPDATE customers
SET name = ' F.C.C. Fluides Conseils Calédonie SARL '
WHERE id = 4669 ;
-- ID 4743: Czech ě
UPDATE customers
SET name = ' DGPack Prostějov '
WHERE id = 4743 ;
-- ID 4764: Zero-width space
UPDATE customers
SET name = ' Mccready Welding Pty Ltd Trading under the entity Mccready '' s Welding Services '
WHERE id = 4764 ;
-- ID 4893: Spanish í
UPDATE customers
SET name = ' Oxiquímica, S.A.P.I. de C.V. '
WHERE id = 4893 ;
-- ----------------------------------------------------------------------------
-- ADDRESSES - Specific fixes
-- ----------------------------------------------------------------------------
-- ID 19: En dash (7– 11)
UPDATE addresses
SET address = ' Lvl 3, Building B, 7– 11 Talavera Road '
WHERE id = 19 ;
-- ============================================================================
2025-12-22 20:54:26 -08:00
-- PART 3: Fix PRODUCTS Table (12,536+ corrupted records)
-- ============================================================================
-- Products table has extensive corruption from mojibake and HTML entities
-- Degree symbol: ° → °
UPDATE products SET title = REPLACE ( title , ' ° ' , ' ° ' ) WHERE title LIKE ' %°% ' ;
UPDATE products SET description = REPLACE ( description , ' ° ' , ' ° ' ) WHERE description LIKE ' %°% ' ;
UPDATE products SET item_description = REPLACE ( item_description , ' ° ' , ' ° ' ) WHERE item_description LIKE ' %°% ' ;
-- Registered trademark: ® → ®
UPDATE products SET title = REPLACE ( title , ' ® ' , ' ® ' ) WHERE title LIKE ' %®% ' ;
UPDATE products SET description = REPLACE ( description , ' ® ' , ' ® ' ) WHERE description LIKE ' %®% ' ;
UPDATE products SET item_description = REPLACE ( item_description , ' ® ' , ' ® ' ) WHERE item_description LIKE ' %®% ' ;
-- Trademark: â„¢ → ™
UPDATE products SET title = REPLACE ( title , ' â„¢ ' , ' ™ ' ) WHERE title LIKE ' %â„¢% ' ;
UPDATE products SET description = REPLACE ( description , ' â„¢ ' , ' ™ ' ) WHERE description LIKE ' %â„¢% ' ;
UPDATE products SET item_description = REPLACE ( item_description , ' â„¢ ' , ' ™ ' ) WHERE item_description LIKE ' %â„¢% ' ;
-- Smart quotes and dashes in products
UPDATE products SET title = REPLACE ( title , ' ’ ' , ' '' ) WHERE title LIKE ' % â € ™ % ' ;
UPDATE products SET description = REPLACE ( description , ' ’ ' , ' '' ) WHERE description LIKE ' % â € ™ % ' ;
UPDATE products SET title = REPLACE ( title , ' “ ' , ' " ' ) WHERE title LIKE ' %“% ' ;
UPDATE products SET description = REPLACE ( description , ' “ ' , ' " ' ) WHERE description LIKE ' %“% ' ;
UPDATE products SET title = REPLACE ( title , ' †' , ' " ' ) WHERE title LIKE ' %â€% ' ;
UPDATE products SET description = REPLACE ( description , ' †' , ' " ' ) WHERE description LIKE ' %â€% ' ;
UPDATE products SET title = REPLACE ( title , ' â€" ' , ' – ' ) WHERE title LIKE ' %â€"% ' ;
UPDATE products SET description = REPLACE ( description , ' â€" ' , ' – ' ) WHERE description LIKE ' %â€"% ' ;
UPDATE products SET title = REPLACE ( title , ' â€" ' , ' — ' ) WHERE title LIKE ' %â€"% ' ;
UPDATE products SET description = REPLACE ( description , ' â€" ' , ' — ' ) WHERE description LIKE ' %â€"% ' ;
-- Accented characters in products
UPDATE products SET title = REPLACE ( title , ' é ' , ' é ' ) WHERE title LIKE ' %é% ' ;
UPDATE products SET description = REPLACE ( description , ' é ' , ' é ' ) WHERE description LIKE ' %é% ' ;
UPDATE products SET title = REPLACE ( title , ' ü ' , ' ü ' ) WHERE title LIKE ' %ü% ' ;
UPDATE products SET description = REPLACE ( description , ' ü ' , ' ü ' ) WHERE description LIKE ' %ü% ' ;
-- HTML entities (shouldn't be in database)
UPDATE products SET title = REPLACE ( title , ' – ' , ' – ' ) WHERE title LIKE ' %–% ' ;
UPDATE products SET description = REPLACE ( description , ' – ' , ' – ' ) WHERE description LIKE ' %–% ' ;
UPDATE products SET title = REPLACE ( title , ' ” ' , ' " ' ) WHERE title LIKE ' %”% ' ;
UPDATE products SET description = REPLACE ( description , ' ” ' , ' " ' ) WHERE description LIKE ' %”% ' ;
UPDATE products SET title = REPLACE ( title , ' “ ' , ' " ' ) WHERE title LIKE ' %“% ' ;
UPDATE products SET description = REPLACE ( description , ' “ ' , ' " ' ) WHERE description LIKE ' %“% ' ;
UPDATE products SET title = REPLACE ( title , ' & ' , ' & ' ) WHERE title LIKE ' %&% ' ;
UPDATE products SET description = REPLACE ( description , ' & ' , ' & ' ) WHERE description LIKE ' %&% ' ;
UPDATE products SET title = REPLACE ( title , ' ½ ' , ' ½ ' ) WHERE title LIKE ' %½% ' ;
UPDATE products SET description = REPLACE ( description , ' ½ ' , ' ½ ' ) WHERE description LIKE ' %½% ' ;
UPDATE products SET title = REPLACE ( title , ' ° ' , ' ° ' ) WHERE title LIKE ' %°% ' ;
UPDATE products SET description = REPLACE ( description , ' ° ' , ' ° ' ) WHERE description LIKE ' %°% ' ;
UPDATE products SET title = REPLACE ( title , ' ' , ' ' ) WHERE title LIKE ' % % ' ;
UPDATE products SET description = REPLACE ( description , ' ' , ' ' ) WHERE description LIKE ' % % ' ;
-- Specific product fixes
UPDATE products SET title = ' C95SN189C DSTGL40/C-Digital temperature probe for P8xx1 Web Sensor. Range -30 to +80°C. With CINCH connector, 1m Cable ' WHERE id = 30 ;
UPDATE products SET title = ' Mid-West Instrument Model 240 – SC – 02 – O(TT) ' WHERE id = 38 ;
UPDATE products SET title = ' Newson Gale Earth-Rite® II FIBC Static Earthing System, GRP Enclosure with 5m 2 Core Spiral Cable and FIBC Clamp ' WHERE id = 67 ;
UPDATE products SET title = ' Newson Gale Earth-Rite® RTR™ Tri-Mode Static Grounding System, Metal Enclosure, X90-IP Heavy Duty Clamp with 10m 2 Core Spiral Cable ' WHERE id = 85 ;
-- ============================================================================
-- PART 4: Verification Queries
2025-12-09 12:23:22 -08:00
-- ============================================================================
-- Run these after the fixes to verify they worked
-- Check fixed principles
SELECT id , name , city FROM principles WHERE id IN ( 2 , 9 , 13 , 14 , 58 , 65 ) ;
-- Check fixed customers
SELECT id , name FROM customers WHERE id IN ( 253 , 1006 , 1387 , 1608 , 2174 , 2215 , 2375 , 3143 , 3529 , 4325 , 4350 , 4669 , 4743 , 4764 , 4893 ) ;
-- Check fixed addresses
SELECT id , address FROM addresses WHERE id = 19 ;
-- Count remaining corrupted records (should be much lower)
SELECT COUNT ( * ) as remaining_corrupted_principles
FROM principles
WHERE name REGEXP ' [^ -~] ' OR address REGEXP ' [^ -~] ' OR city REGEXP ' [^ -~] ' ;
SELECT COUNT ( * ) as remaining_corrupted_customers
FROM customers
WHERE name REGEXP ' [^ -~] ' ;
SELECT COUNT ( * ) as remaining_corrupted_addresses
FROM addresses
WHERE address REGEXP ' [^ -~] ' OR city REGEXP ' [^ -~] ' ;
2025-12-22 20:54:26 -08:00
SELECT COUNT ( * ) as remaining_corrupted_products
FROM products
WHERE title REGEXP ' [^ -~] ' OR description REGEXP ' [^ -~] ' OR item_description REGEXP ' [^ -~] ' ;
-- Check sample fixed products
SELECT id , title FROM products WHERE id IN ( 30 , 38 , 67 , 85 ) ;
2025-12-09 12:23:22 -08:00
-- ============================================================================
-- COMPLETION
-- ============================================================================
SELECT ' Fix script completed! Review the verification queries above. ' as status ;