cmc-sales/scripts/fix_corrupted_data.sql

251 lines
11 KiB
MySQL
Raw Normal View History

2025-12-09 12:23:22 -08:00
-- ============================================================================
-- Fix Corrupted Character Data in CMC Database
-- ============================================================================
-- This script fixes mojibake (corrupted UTF-8 characters) in the database
-- Run after converting tables to utf8mb4
--
-- IMPORTANT: Review this script and test on a backup first!
-- Create backup: docker exec cmc-db mariadb-dump -u root -psecureRootPassword cmc | gzip > backup_before_fix.sql.gz
--
-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data.sql
-- ============================================================================
-- Create a backup table for principles (optional safety measure)
-- DROP TABLE IF EXISTS principles_backup;
-- CREATE TABLE principles_backup AS SELECT * FROM principles;
SET NAMES utf8mb4;
-- ============================================================================
-- PART 1: Global Pattern Fixes Using REPLACE
-- ============================================================================
-- These fix common mojibake patterns across all affected tables
-- ----------------------------------------------------------------------------
-- Fix CUSTOMERS table
-- ----------------------------------------------------------------------------
UPDATE customers SET name = REPLACE(name, '’', ''') WHERE name LIKE '%â%'; -- Smart apostrophe
UPDATE customers SET name = REPLACE(name, '“', '"') WHERE name LIKE '%“%'; -- Left double quote
UPDATE customers SET name = REPLACE(name, 'â€', '"') WHERE name LIKE '%â€%'; -- Right double quote
UPDATE customers SET name = REPLACE(name, 'â€"', '') WHERE name LIKE '%â€"%'; -- En dash
UPDATE customers SET name = REPLACE(name, 'â€"', '') WHERE name LIKE '%â€"%'; -- Em dash
UPDATE customers SET name = REPLACE(name, '​', '') WHERE name LIKE '%​%'; -- Zero-width space (remove)
UPDATE customers SET name = REPLACE(name, ' ', ' ') WHERE name LIKE '% %'; -- Special space
UPDATE customers SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; -- é
UPDATE customers SET name = REPLACE(name, 'É', 'É') WHERE name LIKE '%É%'; -- É
UPDATE customers SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%'; -- ó
UPDATE customers SET name = REPLACE(name, 'Ã', 'í') WHERE name LIKE '%Ã%'; -- í
UPDATE customers SET name = REPLACE(name, 'ç', 'ç') WHERE name LIKE '%ç%'; -- ç
UPDATE customers SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; -- ü
UPDATE customers SET name = REPLACE(name, 'á', 'á') WHERE name LIKE '%á%'; -- á
UPDATE customers SET name = REPLACE(name, 'ñ', 'ñ') WHERE name LIKE '%ñ%'; -- ñ
UPDATE customers SET name = REPLACE(name, 'ö', 'ö') WHERE name LIKE '%ö%'; -- ö
UPDATE customers SET name = REPLACE(name, 'ô', 'ô') WHERE name LIKE '%ô%'; -- ô
UPDATE customers SET name = REPLACE(name, 'İ', 'İ') WHERE name LIKE '%İ%'; -- Turkish İ
UPDATE customers SET name = REPLACE(name, 'Å', 'Ş') WHERE name LIKE '%Å%'; -- Turkish Ş
UPDATE customers SET name = REPLACE(name, 'Ä', 'Ğ') WHERE name LIKE '%Ä%'; -- Turkish Ğ
UPDATE customers SET name = REPLACE(name, 'Ä', 'Č') WHERE name LIKE '%Ä%'; -- Czech Č
UPDATE customers SET name = REPLACE(name, 'Ä›', 'ě') WHERE name LIKE '%Ä›%'; -- Czech ě
-- Remove trailing tabs
UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t';
UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t';
-- ----------------------------------------------------------------------------
-- Fix PRINCIPLES table
-- ----------------------------------------------------------------------------
UPDATE principles SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%';
UPDATE principles SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%';
UPDATE principles SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%';
UPDATE principles SET address = REPLACE(address, 'ß', 'ß') WHERE address LIKE '%ß%'; -- German ß
UPDATE principles SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%';
UPDATE principles SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%';
UPDATE principles SET address = REPLACE(address, 'ñ', 'ñ') WHERE address LIKE '%ñ%';
UPDATE principles SET address = REPLACE(address, 'Ä', 'č') WHERE address LIKE '%Ä%'; -- Czech č
UPDATE principles SET address = REPLACE(address, 'ž', 'ž') WHERE address LIKE '%ž%'; -- Czech ž
UPDATE principles SET address = REPLACE(address, 'Å¡', 'š') WHERE address LIKE '%Å¡%'; -- Czech š
UPDATE principles SET address = REPLACE(address, 'Ä›', 'ě') WHERE address LIKE '%Ä›%'; -- Czech ě
UPDATE principles SET address = REPLACE(address, 'Å', 'ň') WHERE address LIKE '%Å%'; -- Czech ň
UPDATE principles SET address = REPLACE(address, 'Å™', 'ř') WHERE address LIKE '%Å™%'; -- Czech ř
UPDATE principles SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%';
UPDATE principles SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%';
UPDATE principles SET city = REPLACE(city, 'ó', 'ó') WHERE city LIKE '%ó%';
-- ----------------------------------------------------------------------------
-- Fix ADDRESSES table
-- ----------------------------------------------------------------------------
UPDATE addresses SET address = REPLACE(address, 'â€"', '') WHERE address LIKE '%â€"%'; -- En dash
UPDATE addresses SET address = REPLACE(address, 'â€"', '') WHERE address LIKE '%â€"%'; -- Em dash
UPDATE addresses SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%';
UPDATE addresses SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%';
UPDATE addresses SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%';
UPDATE addresses SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%';
-- ============================================================================
-- PART 2: Specific Record Fixes
-- ============================================================================
-- These fix specific known corrupted records where pattern matching isn't enough
-- ----------------------------------------------------------------------------
-- PRINCIPLES - Specific fixes
-- ----------------------------------------------------------------------------
-- ID 2: German ß
UPDATE principles
SET address = 'Heinz-Fangman-Straße 18'
WHERE id = 2;
-- ID 9: Czech characters (this is complex, needs full correction)
UPDATE principles
SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm',
city = 'Rožnov pod Radhoštěm'
WHERE id = 9;
-- ID 13: German ü
UPDATE principles
SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH'
WHERE id = 13;
-- ID 14: German ß
UPDATE principles
SET address = 'Liebigstraße 2'
WHERE id = 14;
-- ID 58: Spanish ó
UPDATE principles
SET name = 'Nosia S.r.l.- Señalización Industrial'
WHERE id = 58;
-- ID 65: German ß
UPDATE principles
SET address = 'Alte Emser Straße 32'
WHERE id = 65;
-- ----------------------------------------------------------------------------
-- CUSTOMERS - Specific fixes
-- ----------------------------------------------------------------------------
-- ID 253: É
UPDATE customers
SET name = 'SOLUZÉ CIVIL ENGINEERS Trading Under SOLUZE PTY LTD'
WHERE id = 253;
-- ID 1006: Smart apostrophe
UPDATE customers
SET name = 'Dr. Prem''s Molecules Private Limited (DPMolecules)'
WHERE id = 1006;
-- ID 1387: Smart quotes
UPDATE customers
SET name = 'DEE ENTERPRISES (QLD) PTY LTD trading as "MEKLEK"'
WHERE id = 1387;
-- ID 1608: Smart apostrophe
UPDATE customers
SET name = 'Guidera O''Connor Pty Ltd'
WHERE id = 1608;
-- ID 2174: Zero-width space
UPDATE customers
SET name = 'Ingredion ANZ Pty Ltd'
WHERE id = 2174;
-- ID 2215: French é
UPDATE customers
SET name = 'Vale Nouvelle-Calédonie S.A.S'
WHERE id = 2215;
-- ID 2375: Spanish ó
UPDATE customers
SET name = 'Evaluación y Control Ambiental S.A.S.'
WHERE id = 2375;
-- ID 3143: Portuguese ç and ó
UPDATE customers
SET name = 'Zontahevy Comércio e Serviços Offshore Ltda'
WHERE id = 3143;
-- ID 3529: French é
UPDATE customers
SET name = 'Société des Mines de Syama'
WHERE id = 3529;
-- ID 3633: Special space (em space)
UPDATE customers
SET name = 'P.J. Berriman & Co PTY LTD'
WHERE id = 3633;
-- ID 4325: Danish ø
UPDATE customers
SET name = 'Rambøll Danmark'
WHERE id = 4325;
-- ID 4350: Turkish characters İ, Ş, Ğ
UPDATE customers
SET name = 'SONNIVA ENERGY MAKİNA İNŞAAT İTH. İHR. LTD. ŞTİ.'
WHERE id = 4350;
-- ID 4669: French é
UPDATE customers
SET name = 'F.C.C. Fluides Conseils Calédonie SARL'
WHERE id = 4669;
-- ID 4743: Czech ě
UPDATE customers
SET name = 'DGPack Prostějov'
WHERE id = 4743;
-- ID 4764: Zero-width space
UPDATE customers
SET name = 'Mccready Welding Pty Ltd Trading under the entity Mccready''s Welding Services'
WHERE id = 4764;
-- ID 4893: Spanish í
UPDATE customers
SET name = 'Oxiquímica, S.A.P.I. de C.V.'
WHERE id = 4893;
-- ----------------------------------------------------------------------------
-- ADDRESSES - Specific fixes
-- ----------------------------------------------------------------------------
-- ID 19: En dash (711)
UPDATE addresses
SET address = 'Lvl 3, Building B, 711 Talavera Road'
WHERE id = 19;
-- ============================================================================
-- PART 3: Verification Queries
-- ============================================================================
-- Run these after the fixes to verify they worked
-- Check fixed principles
SELECT id, name, city FROM principles WHERE id IN (2, 9, 13, 14, 58, 65);
-- Check fixed customers
SELECT id, name FROM customers WHERE id IN (253, 1006, 1387, 1608, 2174, 2215, 2375, 3143, 3529, 4325, 4350, 4669, 4743, 4764, 4893);
-- Check fixed addresses
SELECT id, address FROM addresses WHERE id = 19;
-- Count remaining corrupted records (should be much lower)
SELECT COUNT(*) as remaining_corrupted_principles
FROM principles
WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
SELECT COUNT(*) as remaining_corrupted_customers
FROM customers
WHERE name REGEXP '[^ -~]';
SELECT COUNT(*) as remaining_corrupted_addresses
FROM addresses
WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
-- ============================================================================
-- COMPLETION
-- ============================================================================
SELECT 'Fix script completed! Review the verification queries above.' as status;