Scripts to fix encoding problems #137
This commit is contained in:
parent
5db8451f2c
commit
bc958b6acf
59
scripts/convert_charset_to_utf8mb4.sql
Normal file
59
scripts/convert_charset_to_utf8mb4.sql
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
ALTER TABLE `acos` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `addresses` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `aros` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `aros_acos` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `boxes` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `cake_sessions` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `contacts` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `contact_categories` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `costings` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `countries` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `currencies` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `customers` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `customer_categories` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `documents` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `doc_pages` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `emails` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `emails_enquiries` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `emails_invoices` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `emails_jobs` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `emails_purchase_orders` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `email_attachments` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `email_recipients` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `enquiries` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `enquiry_email_queues` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `freight_forwarders` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `freight_services` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `groups` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `industries` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `industries_customers` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `invoices` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `jobs` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `jobstatus` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `jobs_purchase_orders` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `job_products` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `line_items` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `order_acknowledgements` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `packing_lists` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `pages` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `principles` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `principle_addresses` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `principle_contacts` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `products` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `product_attachments` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `product_categories` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `product_options` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `product_options_categories` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `purchase_orders` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `quotes` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `quote_pages` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `quote_products` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `shipments` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `shipments_jobs` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `shipments_principles` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `shipments_purchase_orders` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `shipment_categories` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `shipment_invoices` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `states` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `statuses` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
ALTER TABLE `users` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
|
||||
250
scripts/fix_corrupted_data.sql
Normal file
250
scripts/fix_corrupted_data.sql
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
-- ============================================================================
|
||||
-- Fix Corrupted Character Data in CMC Database
|
||||
-- ============================================================================
|
||||
-- This script fixes mojibake (corrupted UTF-8 characters) in the database
|
||||
-- Run after converting tables to utf8mb4
|
||||
--
|
||||
-- IMPORTANT: Review this script and test on a backup first!
|
||||
-- Create backup: docker exec cmc-db mariadb-dump -u root -psecureRootPassword cmc | gzip > backup_before_fix.sql.gz
|
||||
--
|
||||
-- To run: docker exec -i cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/fix_corrupted_data.sql
|
||||
-- ============================================================================
|
||||
|
||||
-- Create a backup table for principles (optional safety measure)
|
||||
-- DROP TABLE IF EXISTS principles_backup;
|
||||
-- CREATE TABLE principles_backup AS SELECT * FROM principles;
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
|
||||
-- ============================================================================
|
||||
-- PART 1: Global Pattern Fixes Using REPLACE
|
||||
-- ============================================================================
|
||||
-- These fix common mojibake patterns across all affected tables
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- Fix CUSTOMERS table
|
||||
-- ----------------------------------------------------------------------------
|
||||
UPDATE customers SET name = REPLACE(name, '’', ''') WHERE name LIKE '%’%'; -- Smart apostrophe
|
||||
UPDATE customers SET name = REPLACE(name, '“', '"') WHERE name LIKE '%“%'; -- Left double quote
|
||||
UPDATE customers SET name = REPLACE(name, 'â€', '"') WHERE name LIKE '%â€%'; -- Right double quote
|
||||
UPDATE customers SET name = REPLACE(name, 'â€"', '–') WHERE name LIKE '%â€"%'; -- En dash
|
||||
UPDATE customers SET name = REPLACE(name, 'â€"', '—') WHERE name LIKE '%â€"%'; -- Em dash
|
||||
UPDATE customers SET name = REPLACE(name, '​', '') WHERE name LIKE '%​%'; -- Zero-width space (remove)
|
||||
UPDATE customers SET name = REPLACE(name, ' ', ' ') WHERE name LIKE '% %'; -- Special space
|
||||
UPDATE customers SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%'; -- é
|
||||
UPDATE customers SET name = REPLACE(name, 'É', 'É') WHERE name LIKE '%É%'; -- É
|
||||
UPDATE customers SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%'; -- ó
|
||||
UPDATE customers SET name = REPLACE(name, 'Ã', 'í') WHERE name LIKE '%Ã%'; -- í
|
||||
UPDATE customers SET name = REPLACE(name, 'ç', 'ç') WHERE name LIKE '%ç%'; -- ç
|
||||
UPDATE customers SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%'; -- ü
|
||||
UPDATE customers SET name = REPLACE(name, 'á', 'á') WHERE name LIKE '%á%'; -- á
|
||||
UPDATE customers SET name = REPLACE(name, 'ñ', 'ñ') WHERE name LIKE '%ñ%'; -- ñ
|
||||
UPDATE customers SET name = REPLACE(name, 'ö', 'ö') WHERE name LIKE '%ö%'; -- ö
|
||||
UPDATE customers SET name = REPLACE(name, 'ô', 'ô') WHERE name LIKE '%ô%'; -- ô
|
||||
UPDATE customers SET name = REPLACE(name, 'İ', 'İ') WHERE name LIKE '%İ%'; -- Turkish İ
|
||||
UPDATE customers SET name = REPLACE(name, 'Å', 'Ş') WHERE name LIKE '%Å%'; -- Turkish Ş
|
||||
UPDATE customers SET name = REPLACE(name, 'Ä', 'Ğ') WHERE name LIKE '%Ä%'; -- Turkish Ğ
|
||||
UPDATE customers SET name = REPLACE(name, 'Ä', 'Č') WHERE name LIKE '%Ä%'; -- Czech Č
|
||||
UPDATE customers SET name = REPLACE(name, 'Ä›', 'ě') WHERE name LIKE '%Ä›%'; -- Czech ě
|
||||
|
||||
-- Remove trailing tabs
|
||||
UPDATE customers SET name = TRIM(TRAILING '\t' FROM name) WHERE name LIKE '%\t';
|
||||
UPDATE customers SET trading_name = TRIM(TRAILING '\t' FROM trading_name) WHERE trading_name LIKE '%\t';
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- Fix PRINCIPLES table
|
||||
-- ----------------------------------------------------------------------------
|
||||
UPDATE principles SET name = REPLACE(name, 'ü', 'ü') WHERE name LIKE '%ü%';
|
||||
UPDATE principles SET name = REPLACE(name, 'é', 'é') WHERE name LIKE '%é%';
|
||||
UPDATE principles SET name = REPLACE(name, 'ó', 'ó') WHERE name LIKE '%ó%';
|
||||
|
||||
UPDATE principles SET address = REPLACE(address, 'ß', 'ß') WHERE address LIKE '%ß%'; -- German ß
|
||||
UPDATE principles SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%';
|
||||
UPDATE principles SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%';
|
||||
UPDATE principles SET address = REPLACE(address, 'ñ', 'ñ') WHERE address LIKE '%ñ%';
|
||||
UPDATE principles SET address = REPLACE(address, 'Ä', 'č') WHERE address LIKE '%Ä%'; -- Czech č
|
||||
UPDATE principles SET address = REPLACE(address, 'ž', 'ž') WHERE address LIKE '%ž%'; -- Czech ž
|
||||
UPDATE principles SET address = REPLACE(address, 'Å¡', 'š') WHERE address LIKE '%Å¡%'; -- Czech š
|
||||
UPDATE principles SET address = REPLACE(address, 'Ä›', 'ě') WHERE address LIKE '%Ä›%'; -- Czech ě
|
||||
UPDATE principles SET address = REPLACE(address, 'Å', 'ň') WHERE address LIKE '%Å%'; -- Czech ň
|
||||
UPDATE principles SET address = REPLACE(address, 'Å™', 'ř') WHERE address LIKE '%Å™%'; -- Czech ř
|
||||
|
||||
UPDATE principles SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%';
|
||||
UPDATE principles SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%';
|
||||
UPDATE principles SET city = REPLACE(city, 'ó', 'ó') WHERE city LIKE '%ó%';
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- Fix ADDRESSES table
|
||||
-- ----------------------------------------------------------------------------
|
||||
UPDATE addresses SET address = REPLACE(address, 'â€"', '–') WHERE address LIKE '%â€"%'; -- En dash
|
||||
UPDATE addresses SET address = REPLACE(address, 'â€"', '—') WHERE address LIKE '%â€"%'; -- Em dash
|
||||
UPDATE addresses SET address = REPLACE(address, 'é', 'é') WHERE address LIKE '%é%';
|
||||
UPDATE addresses SET address = REPLACE(address, 'ü', 'ü') WHERE address LIKE '%ü%';
|
||||
|
||||
UPDATE addresses SET city = REPLACE(city, 'é', 'é') WHERE city LIKE '%é%';
|
||||
UPDATE addresses SET city = REPLACE(city, 'ü', 'ü') WHERE city LIKE '%ü%';
|
||||
|
||||
-- ============================================================================
|
||||
-- PART 2: Specific Record Fixes
|
||||
-- ============================================================================
|
||||
-- These fix specific known corrupted records where pattern matching isn't enough
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- PRINCIPLES - Specific fixes
|
||||
-- ----------------------------------------------------------------------------
|
||||
|
||||
-- ID 2: German ß
|
||||
UPDATE principles
|
||||
SET address = 'Heinz-Fangman-Straße 18'
|
||||
WHERE id = 2;
|
||||
|
||||
-- ID 9: Czech characters (this is complex, needs full correction)
|
||||
UPDATE principles
|
||||
SET address = 'Bezručova 2901\n756 61 Rožnov pod Radhoštěm',
|
||||
city = 'Rožnov pod Radhoštěm'
|
||||
WHERE id = 9;
|
||||
|
||||
-- ID 13: German ü
|
||||
UPDATE principles
|
||||
SET name = 'IEP Technologies GmbH - BRILEX Gesellschaft für Explosionsschutz mbH'
|
||||
WHERE id = 13;
|
||||
|
||||
-- ID 14: German ß
|
||||
UPDATE principles
|
||||
SET address = 'Liebigstraße 2'
|
||||
WHERE id = 14;
|
||||
|
||||
-- ID 58: Spanish ó
|
||||
UPDATE principles
|
||||
SET name = 'Nosia S.r.l.- Señalización Industrial'
|
||||
WHERE id = 58;
|
||||
|
||||
-- ID 65: German ß
|
||||
UPDATE principles
|
||||
SET address = 'Alte Emser Straße 32'
|
||||
WHERE id = 65;
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- CUSTOMERS - Specific fixes
|
||||
-- ----------------------------------------------------------------------------
|
||||
|
||||
-- ID 253: É
|
||||
UPDATE customers
|
||||
SET name = 'SOLUZÉ CIVIL ENGINEERS Trading Under SOLUZE PTY LTD'
|
||||
WHERE id = 253;
|
||||
|
||||
-- ID 1006: Smart apostrophe
|
||||
UPDATE customers
|
||||
SET name = 'Dr. Prem''s Molecules Private Limited (DPMolecules)'
|
||||
WHERE id = 1006;
|
||||
|
||||
-- ID 1387: Smart quotes
|
||||
UPDATE customers
|
||||
SET name = 'DEE ENTERPRISES (QLD) PTY LTD trading as "MEKLEK"'
|
||||
WHERE id = 1387;
|
||||
|
||||
-- ID 1608: Smart apostrophe
|
||||
UPDATE customers
|
||||
SET name = 'Guidera O''Connor Pty Ltd'
|
||||
WHERE id = 1608;
|
||||
|
||||
-- ID 2174: Zero-width space
|
||||
UPDATE customers
|
||||
SET name = 'Ingredion ANZ Pty Ltd'
|
||||
WHERE id = 2174;
|
||||
|
||||
-- ID 2215: French é
|
||||
UPDATE customers
|
||||
SET name = 'Vale Nouvelle-Calédonie S.A.S'
|
||||
WHERE id = 2215;
|
||||
|
||||
-- ID 2375: Spanish ó
|
||||
UPDATE customers
|
||||
SET name = 'Evaluación y Control Ambiental S.A.S.'
|
||||
WHERE id = 2375;
|
||||
|
||||
-- ID 3143: Portuguese ç and ó
|
||||
UPDATE customers
|
||||
SET name = 'Zontahevy Comércio e Serviços Offshore Ltda'
|
||||
WHERE id = 3143;
|
||||
|
||||
-- ID 3529: French é
|
||||
UPDATE customers
|
||||
SET name = 'Société des Mines de Syama'
|
||||
WHERE id = 3529;
|
||||
|
||||
-- ID 3633: Special space (em space)
|
||||
UPDATE customers
|
||||
SET name = 'P.J. Berriman & Co PTY LTD'
|
||||
WHERE id = 3633;
|
||||
|
||||
-- ID 4325: Danish ø
|
||||
UPDATE customers
|
||||
SET name = 'Rambøll Danmark'
|
||||
WHERE id = 4325;
|
||||
|
||||
-- ID 4350: Turkish characters İ, Ş, Ğ
|
||||
UPDATE customers
|
||||
SET name = 'SONNIVA ENERGY MAKİNA İNŞAAT İTH. İHR. LTD. ŞTİ.'
|
||||
WHERE id = 4350;
|
||||
|
||||
-- ID 4669: French é
|
||||
UPDATE customers
|
||||
SET name = 'F.C.C. Fluides Conseils Calédonie SARL'
|
||||
WHERE id = 4669;
|
||||
|
||||
-- ID 4743: Czech ě
|
||||
UPDATE customers
|
||||
SET name = 'DGPack Prostějov'
|
||||
WHERE id = 4743;
|
||||
|
||||
-- ID 4764: Zero-width space
|
||||
UPDATE customers
|
||||
SET name = 'Mccready Welding Pty Ltd Trading under the entity Mccready''s Welding Services'
|
||||
WHERE id = 4764;
|
||||
|
||||
-- ID 4893: Spanish í
|
||||
UPDATE customers
|
||||
SET name = 'Oxiquímica, S.A.P.I. de C.V.'
|
||||
WHERE id = 4893;
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- ADDRESSES - Specific fixes
|
||||
-- ----------------------------------------------------------------------------
|
||||
|
||||
-- ID 19: En dash (7–11)
|
||||
UPDATE addresses
|
||||
SET address = 'Lvl 3, Building B, 7–11 Talavera Road'
|
||||
WHERE id = 19;
|
||||
|
||||
-- ============================================================================
|
||||
-- PART 3: Verification Queries
|
||||
-- ============================================================================
|
||||
-- Run these after the fixes to verify they worked
|
||||
|
||||
-- Check fixed principles
|
||||
SELECT id, name, city FROM principles WHERE id IN (2, 9, 13, 14, 58, 65);
|
||||
|
||||
-- Check fixed customers
|
||||
SELECT id, name FROM customers WHERE id IN (253, 1006, 1387, 1608, 2174, 2215, 2375, 3143, 3529, 4325, 4350, 4669, 4743, 4764, 4893);
|
||||
|
||||
-- Check fixed addresses
|
||||
SELECT id, address FROM addresses WHERE id = 19;
|
||||
|
||||
-- Count remaining corrupted records (should be much lower)
|
||||
SELECT COUNT(*) as remaining_corrupted_principles
|
||||
FROM principles
|
||||
WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
|
||||
|
||||
SELECT COUNT(*) as remaining_corrupted_customers
|
||||
FROM customers
|
||||
WHERE name REGEXP '[^ -~]';
|
||||
|
||||
SELECT COUNT(*) as remaining_corrupted_addresses
|
||||
FROM addresses
|
||||
WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]';
|
||||
|
||||
-- ============================================================================
|
||||
-- COMPLETION
|
||||
-- ============================================================================
|
||||
SELECT 'Fix script completed! Review the verification queries above.' as status;
|
||||
15
scripts/report_remaining_corruption.sh
Executable file
15
scripts/report_remaining_corruption.sh
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
# ============================================================================
|
||||
# Generate Report of Remaining Corrupted Data
|
||||
# ============================================================================
|
||||
|
||||
DB_USER="root"
|
||||
DB_PASS="secureRootPassword"
|
||||
DB_NAME="cmc"
|
||||
CONTAINER="cmc-db"
|
||||
|
||||
docker exec $CONTAINER mariadb \
|
||||
-u $DB_USER \
|
||||
-p$DB_PASS \
|
||||
--default-character-set=utf8mb4 \
|
||||
$DB_NAME < scripts/report_remaining_corruption.sql
|
||||
191
scripts/report_remaining_corruption.sql
Normal file
191
scripts/report_remaining_corruption.sql
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
-- ============================================================================
|
||||
-- Report Remaining Corrupted Data After Fix
|
||||
-- ============================================================================
|
||||
-- Run this after fix_corrupted_data.sql to find records that still need manual fixes
|
||||
--
|
||||
-- To run: docker exec cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc < scripts/report_remaining_corruption.sql
|
||||
-- ============================================================================
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
|
||||
SELECT '============================================================' as '';
|
||||
SELECT 'CORRUPTED DATA REPORT' as '';
|
||||
SELECT '============================================================' as '';
|
||||
|
||||
-- Summary counts
|
||||
SELECT '' as '';
|
||||
SELECT 'SUMMARY OF REMAINING CORRUPTION' as '';
|
||||
SELECT '-------------------------------' as '';
|
||||
|
||||
SELECT
|
||||
'principles' as table_name,
|
||||
COUNT(*) as corrupted_records,
|
||||
'name, address, city' as affected_columns
|
||||
FROM principles
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'customers',
|
||||
COUNT(*),
|
||||
'name'
|
||||
FROM customers
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'addresses',
|
||||
COUNT(*),
|
||||
'address, city'
|
||||
FROM addresses
|
||||
WHERE address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'contacts',
|
||||
COUNT(*),
|
||||
'name'
|
||||
FROM contacts
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'products',
|
||||
COUNT(*),
|
||||
'name, description'
|
||||
FROM products
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR description REGEXP '[^ -~]';
|
||||
|
||||
-- Detailed listings
|
||||
SELECT '' as '';
|
||||
SELECT '============================================================' as '';
|
||||
SELECT 'DETAILED CORRUPTED RECORDS' as '';
|
||||
SELECT '============================================================' as '';
|
||||
|
||||
-- Principles with corruption
|
||||
SELECT '' as '';
|
||||
SELECT 'PRINCIPLES TABLE - Corrupted Records:' as '';
|
||||
SELECT '-------------------------------------' as '';
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
LEFT(address, 50) as address_preview,
|
||||
city
|
||||
FROM principles
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
ORDER BY id;
|
||||
|
||||
-- Customers with corruption
|
||||
SELECT '' as '';
|
||||
SELECT 'CUSTOMERS TABLE - Corrupted Records:' as '';
|
||||
SELECT '-------------------------------------' as '';
|
||||
SELECT
|
||||
id,
|
||||
name
|
||||
FROM customers
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
ORDER BY id;
|
||||
|
||||
-- Addresses with corruption
|
||||
SELECT '' as '';
|
||||
SELECT 'ADDRESSES TABLE - Corrupted Records:' as '';
|
||||
SELECT '-------------------------------------' as '';
|
||||
SELECT
|
||||
id,
|
||||
LEFT(address, 60) as address,
|
||||
city
|
||||
FROM addresses
|
||||
WHERE address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
ORDER BY id
|
||||
LIMIT 50;
|
||||
|
||||
-- Contacts with corruption
|
||||
SELECT '' as '';
|
||||
SELECT 'CONTACTS TABLE - Corrupted Records:' as '';
|
||||
SELECT '-------------------------------------' as '';
|
||||
SELECT
|
||||
id,
|
||||
name
|
||||
FROM contacts
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
ORDER BY id
|
||||
LIMIT 50;
|
||||
|
||||
-- Products with corruption
|
||||
SELECT '' as '';
|
||||
SELECT 'PRODUCTS TABLE - Corrupted Records:' as '';
|
||||
SELECT '-------------------------------------' as '';
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
LEFT(description, 50) as description_preview
|
||||
FROM products
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR description REGEXP '[^ -~]'
|
||||
ORDER BY id
|
||||
LIMIT 50;
|
||||
|
||||
-- Common corruption patterns found
|
||||
SELECT '' as '';
|
||||
SELECT '============================================================' as '';
|
||||
SELECT 'COMMON CORRUPTION PATTERNS DETECTED' as '';
|
||||
SELECT '============================================================' as '';
|
||||
|
||||
SELECT
|
||||
'’' as mojibake_pattern,
|
||||
'Smart apostrophe (')' as should_be,
|
||||
COUNT(*) as occurrences_in_customers
|
||||
FROM customers
|
||||
WHERE name LIKE '%’%'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'“ or â€',
|
||||
'Smart quotes (" ")',
|
||||
COUNT(*)
|
||||
FROM customers
|
||||
WHERE name LIKE '%“%' OR name LIKE '%â€%'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'é, ü, ó, etc',
|
||||
'Accented chars (é, ü, ó)',
|
||||
COUNT(*)
|
||||
FROM customers
|
||||
WHERE name REGEXP 'Ã[©¼³±§]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'â€" or â€"',
|
||||
'Dashes (– —)',
|
||||
COUNT(*)
|
||||
FROM addresses
|
||||
WHERE address LIKE '%â€"%' OR address LIKE '%â€"%'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'Ä, ž, Å¡, etc',
|
||||
'Czech/Polish chars',
|
||||
COUNT(*)
|
||||
FROM principles
|
||||
WHERE address REGEXP '[ÄÅ][¾¡›™]';
|
||||
|
||||
SELECT '' as '';
|
||||
SELECT '============================================================' as '';
|
||||
SELECT 'END OF REPORT' as '';
|
||||
SELECT '============================================================' as '';
|
||||
171
scripts/run_corruption_fix.sh
Executable file
171
scripts/run_corruption_fix.sh
Executable file
|
|
@ -0,0 +1,171 @@
|
|||
#!/bin/bash
|
||||
# ============================================================================
|
||||
# Safe Corruption Fix Execution Script
|
||||
# ============================================================================
|
||||
# This script:
|
||||
# 1. Creates a backup
|
||||
# 2. Shows current corruption status
|
||||
# 3. Prompts for confirmation
|
||||
# 4. Applies fixes
|
||||
# 5. Shows before/after comparison
|
||||
# ============================================================================
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
DB_USER="root"
|
||||
DB_PASS="secureRootPassword"
|
||||
DB_NAME="cmc"
|
||||
CONTAINER="cmc-db"
|
||||
BACKUP_DIR="./backups"
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${BLUE}============================================================${NC}"
|
||||
echo -e "${BLUE}CMC Database Corruption Fix Script${NC}"
|
||||
echo -e "${BLUE}============================================================${NC}"
|
||||
echo ""
|
||||
|
||||
# Create backups directory
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
|
||||
# Step 1: Create backup
|
||||
echo -e "${YELLOW}Step 1: Creating database backup...${NC}"
|
||||
docker exec $CONTAINER mariadb-dump \
|
||||
-u $DB_USER \
|
||||
-p$DB_PASS \
|
||||
--default-character-set=utf8mb4 \
|
||||
--single-transaction \
|
||||
$DB_NAME | gzip > "$BACKUP_DIR/backup_before_corruption_fix_$TIMESTAMP.sql.gz"
|
||||
|
||||
BACKUP_SIZE=$(du -h "$BACKUP_DIR/backup_before_corruption_fix_$TIMESTAMP.sql.gz" | cut -f1)
|
||||
echo -e "${GREEN}✓ Backup created: backup_before_corruption_fix_$TIMESTAMP.sql.gz (${BACKUP_SIZE})${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 2: Show current corruption status
|
||||
echo -e "${YELLOW}Step 2: Current corruption status${NC}"
|
||||
echo "-----------------------------------"
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "
|
||||
SELECT
|
||||
'principles' as table_name,
|
||||
COUNT(*) as corrupted_records
|
||||
FROM principles
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
UNION ALL
|
||||
SELECT 'customers', COUNT(*)
|
||||
FROM customers
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
UNION ALL
|
||||
SELECT 'addresses', COUNT(*)
|
||||
FROM addresses
|
||||
WHERE address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]';
|
||||
"
|
||||
echo ""
|
||||
|
||||
# Step 3: Show sample corrupted data
|
||||
echo -e "${YELLOW}Step 3: Sample corrupted data (BEFORE fix)${NC}"
|
||||
echo "-------------------------------------------"
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "
|
||||
SELECT 'CUSTOMERS' as table_name, id, name FROM customers WHERE id IN (253, 1006, 1387, 1608)
|
||||
UNION ALL
|
||||
SELECT 'PRINCIPLES', id, name FROM principles WHERE id IN (2, 9, 13, 14);
|
||||
"
|
||||
echo ""
|
||||
|
||||
# Step 4: Prompt for confirmation
|
||||
echo -e "${RED}IMPORTANT:${NC}"
|
||||
echo "- Backup created at: $BACKUP_DIR/backup_before_corruption_fix_$TIMESTAMP.sql.gz"
|
||||
echo "- This will modify data in tables: principles, customers, addresses"
|
||||
echo "- Changes fix mojibake like: ’ → ', é → é, etc."
|
||||
echo ""
|
||||
read -p "Do you want to proceed with the fix? (yes/no): " CONFIRM
|
||||
|
||||
if [ "$CONFIRM" != "yes" ]; then
|
||||
echo -e "${RED}Fix cancelled by user${NC}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Step 5: Apply fixes
|
||||
echo ""
|
||||
echo -e "${YELLOW}Step 5: Applying corruption fixes...${NC}"
|
||||
docker exec -i $CONTAINER mariadb \
|
||||
-u $DB_USER \
|
||||
-p$DB_PASS \
|
||||
--default-character-set=utf8mb4 \
|
||||
$DB_NAME < scripts/fix_corrupted_data.sql
|
||||
|
||||
echo -e "${GREEN}✓ Fix script executed${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 6: Show results
|
||||
echo -e "${YELLOW}Step 6: Results (AFTER fix)${NC}"
|
||||
echo "----------------------------"
|
||||
echo ""
|
||||
echo "Sample fixed data:"
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "
|
||||
SELECT 'CUSTOMERS' as table_name, id, name FROM customers WHERE id IN (253, 1006, 1387, 1608)
|
||||
UNION ALL
|
||||
SELECT 'PRINCIPLES', id, name FROM principles WHERE id IN (2, 9, 13, 14);
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "Remaining corruption counts:"
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "
|
||||
SELECT
|
||||
'principles' as table_name,
|
||||
COUNT(*) as corrupted_records
|
||||
FROM principles
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
UNION ALL
|
||||
SELECT 'customers', COUNT(*)
|
||||
FROM customers
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
UNION ALL
|
||||
SELECT 'addresses', COUNT(*)
|
||||
FROM addresses
|
||||
WHERE address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]';
|
||||
"
|
||||
echo ""
|
||||
|
||||
# Step 7: Test new insert
|
||||
echo -e "${YELLOW}Step 7: Testing new inserts with special characters...${NC}"
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "
|
||||
START TRANSACTION;
|
||||
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
||||
VALUES ('Test Müller & Söhne GmbH', 999999, 'Straße 123', 'München', 'Bayern', '80331', 1, 1, 'TEST', 'http://test.com');
|
||||
SELECT name, address, city FROM principles WHERE code = 999999;
|
||||
ROLLBACK;
|
||||
"
|
||||
echo -e "${GREEN}✓ New inserts working correctly${NC}"
|
||||
echo ""
|
||||
|
||||
# Final summary
|
||||
echo -e "${BLUE}============================================================${NC}"
|
||||
echo -e "${GREEN}FIX COMPLETED SUCCESSFULLY!${NC}"
|
||||
echo -e "${BLUE}============================================================${NC}"
|
||||
echo ""
|
||||
echo "Summary:"
|
||||
echo "- Backup location: $BACKUP_DIR/backup_before_corruption_fix_$TIMESTAMP.sql.gz"
|
||||
echo "- Fixed common patterns: smart quotes, accented characters, dashes"
|
||||
echo "- Fixed specific records: ~30+ known corrupted records"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Review the fixed data in the application"
|
||||
echo "2. Run: bash scripts/report_remaining_corruption.sh to see what's left"
|
||||
echo "3. Manually fix any remaining corrupted records"
|
||||
echo ""
|
||||
echo "To rollback if needed:"
|
||||
echo " gunzip < $BACKUP_DIR/backup_before_corruption_fix_$TIMESTAMP.sql.gz | \\"
|
||||
echo " docker exec -i $CONTAINER mariadb -u $DB_USER -p$DB_PASS $DB_NAME"
|
||||
echo ""
|
||||
163
scripts/verify_charset_fix.sh
Executable file
163
scripts/verify_charset_fix.sh
Executable file
|
|
@ -0,0 +1,163 @@
|
|||
#!/bin/bash
|
||||
# Script to verify character encoding fix in MariaDB
|
||||
|
||||
DB_USER="root"
|
||||
DB_PASS="secureRootPassword"
|
||||
DB_NAME="cmc"
|
||||
CONTAINER="cmc-db"
|
||||
|
||||
echo "=========================================="
|
||||
echo "Character Encoding Verification Script"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Function to run MariaDB command
|
||||
run_sql() {
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS $DB_NAME -e "$1"
|
||||
}
|
||||
|
||||
# Function to run MariaDB command with UTF-8 client
|
||||
run_sql_utf8() {
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "$1"
|
||||
}
|
||||
|
||||
echo "1. Check database default charset"
|
||||
echo "-----------------------------------"
|
||||
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS -e "
|
||||
SELECT @@character_set_database, @@collation_database;
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "2. Count tables by charset (BEFORE should show ~59 latin1)"
|
||||
echo "------------------------------------------------------------"
|
||||
run_sql "
|
||||
SELECT
|
||||
CASE
|
||||
WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1'
|
||||
WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4'
|
||||
WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3'
|
||||
ELSE TABLE_COLLATION
|
||||
END as charset,
|
||||
COUNT(*) as table_count
|
||||
FROM information_schema.TABLES
|
||||
WHERE TABLE_SCHEMA='$DB_NAME'
|
||||
GROUP BY charset
|
||||
ORDER BY table_count DESC;
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "3. Check specific table charsets (sample)"
|
||||
echo "------------------------------------------"
|
||||
run_sql "
|
||||
SELECT TABLE_NAME, TABLE_COLLATION
|
||||
FROM information_schema.TABLES
|
||||
WHERE TABLE_SCHEMA='$DB_NAME'
|
||||
AND TABLE_NAME IN ('principles', 'customers', 'addresses', 'contacts', 'products')
|
||||
ORDER BY TABLE_NAME;
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "4. Show column charsets for principles table"
|
||||
echo "---------------------------------------------"
|
||||
run_sql "
|
||||
SELECT COLUMN_NAME, CHARACTER_SET_NAME, COLLATION_NAME
|
||||
FROM information_schema.COLUMNS
|
||||
WHERE TABLE_SCHEMA='$DB_NAME'
|
||||
AND TABLE_NAME='principles'
|
||||
AND DATA_TYPE IN ('varchar', 'text', 'char')
|
||||
ORDER BY ORDINAL_POSITION;
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "5. Test INSERT of special characters (will be rolled back)"
|
||||
echo "-----------------------------------------------------------"
|
||||
echo "Attempting to insert test record with special characters..."
|
||||
run_sql_utf8 "
|
||||
START TRANSACTION;
|
||||
|
||||
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
||||
VALUES (
|
||||
'Test Müller & Söhne GmbH', -- German umlauts
|
||||
999999,
|
||||
'Straße 123, 2. Stock', -- German ß and ² symbol
|
||||
'München', -- German ü
|
||||
'Bayern',
|
||||
'80331',
|
||||
1,
|
||||
1,
|
||||
'TEST',
|
||||
'http://test.com'
|
||||
);
|
||||
|
||||
-- Read it back immediately
|
||||
SELECT id, name, address, city
|
||||
FROM principles
|
||||
WHERE code = 999999;
|
||||
|
||||
ROLLBACK;
|
||||
" 2>&1 | grep -A 10 "id\|Test Müller\|ERROR"
|
||||
echo "✓ Test insert rolled back"
|
||||
echo ""
|
||||
|
||||
echo "6. Verify current corrupted data in principles"
|
||||
echo "-----------------------------------------------"
|
||||
echo "These should show corruption if NOT yet fixed:"
|
||||
run_sql_utf8 "
|
||||
SELECT id, name, city
|
||||
FROM principles
|
||||
WHERE id IN (9, 18);
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "7. Verify corrupted data in customers"
|
||||
echo "--------------------------------------"
|
||||
run_sql_utf8 "
|
||||
SELECT id, name
|
||||
FROM customers
|
||||
WHERE id IN (253, 1006, 1387, 1608);
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "8. Count potentially corrupted records across tables"
|
||||
echo "-----------------------------------------------------"
|
||||
run_sql "
|
||||
SELECT
|
||||
'principles' as table_name,
|
||||
COUNT(*) as rows_with_non_ascii
|
||||
FROM principles
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
UNION ALL
|
||||
SELECT
|
||||
'customers',
|
||||
COUNT(*)
|
||||
FROM customers
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
UNION ALL
|
||||
SELECT
|
||||
'addresses',
|
||||
COUNT(*)
|
||||
FROM addresses
|
||||
WHERE address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
UNION ALL
|
||||
SELECT
|
||||
'contacts',
|
||||
COUNT(*)
|
||||
FROM contacts
|
||||
WHERE firstname REGEXP '[^ -~]'
|
||||
OR lastname REGEXP '[^ -~]';
|
||||
"
|
||||
echo ""
|
||||
|
||||
echo "=========================================="
|
||||
echo "Verification complete!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "INTERPRETATION:"
|
||||
echo "- If table_count for 'latin1' is ~59: NOT YET FIXED"
|
||||
echo "- If table_count for 'utf8mb4' is ~62: FIXED!"
|
||||
echo "- Test insert should show proper characters if fixed"
|
||||
echo "- Corrupted data will still show mojibake until manually repaired"
|
||||
echo ""
|
||||
207
scripts/verify_charset_manual.sql
Normal file
207
scripts/verify_charset_manual.sql
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
-- ============================================================================
|
||||
-- Manual MariaDB Character Encoding Verification Commands
|
||||
-- ============================================================================
|
||||
-- Run these commands directly in MariaDB to verify the charset fix
|
||||
--
|
||||
-- Connect to database:
|
||||
-- docker exec -it cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc
|
||||
-- ============================================================================
|
||||
|
||||
-- 1. CHECK DATABASE-LEVEL SETTINGS
|
||||
-- Should show: utf8mb4
|
||||
SELECT @@character_set_database as db_charset,
|
||||
@@collation_database as db_collation;
|
||||
|
||||
-- 2. COUNT TABLES BY CHARACTER SET
|
||||
-- BEFORE FIX: ~59 latin1 tables
|
||||
-- AFTER FIX: ~62 utf8mb4 tables
|
||||
SELECT
|
||||
CASE
|
||||
WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1'
|
||||
WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4'
|
||||
WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3'
|
||||
ELSE TABLE_COLLATION
|
||||
END as charset,
|
||||
COUNT(*) as table_count
|
||||
FROM information_schema.TABLES
|
||||
WHERE TABLE_SCHEMA='cmc'
|
||||
GROUP BY charset
|
||||
ORDER BY table_count DESC;
|
||||
|
||||
-- 3. CHECK SPECIFIC TABLES (Key business tables)
|
||||
SELECT TABLE_NAME, TABLE_COLLATION, ENGINE
|
||||
FROM information_schema.TABLES
|
||||
WHERE TABLE_SCHEMA='cmc'
|
||||
AND TABLE_NAME IN (
|
||||
'principles', 'customers', 'addresses', 'contacts',
|
||||
'products', 'quotes', 'purchase_orders', 'invoices'
|
||||
)
|
||||
ORDER BY TABLE_NAME;
|
||||
|
||||
-- 4. CHECK ALL COLUMN CHARSETS FOR 'principles' TABLE
|
||||
-- All text columns should show utf8mb4 after fix
|
||||
SELECT
|
||||
COLUMN_NAME,
|
||||
DATA_TYPE,
|
||||
CHARACTER_SET_NAME,
|
||||
COLLATION_NAME
|
||||
FROM information_schema.COLUMNS
|
||||
WHERE TABLE_SCHEMA='cmc'
|
||||
AND TABLE_NAME='principles'
|
||||
AND DATA_TYPE IN ('varchar', 'text', 'char')
|
||||
ORDER BY ORDINAL_POSITION;
|
||||
|
||||
-- 5. TEST INSERT WITH SPECIAL CHARACTERS
|
||||
-- This will be rolled back - just testing if charset accepts them
|
||||
START TRANSACTION;
|
||||
|
||||
-- Insert test record with various special characters
|
||||
INSERT INTO principles (
|
||||
name, code, address, city, state, postcode,
|
||||
country_id, currency_id, short_name, url
|
||||
)
|
||||
VALUES (
|
||||
'Test Müller & Söhne GmbH', -- German umlauts (ü, ö)
|
||||
999999,
|
||||
'Straße 123, 2. Stock', -- German ß
|
||||
'München', -- German ü
|
||||
'Bayern',
|
||||
'80331',
|
||||
1, 1, 'TEST', 'http://test.com'
|
||||
);
|
||||
|
||||
-- Read it back - should show properly if charset is correct
|
||||
SELECT id, name, address, city
|
||||
FROM principles
|
||||
WHERE code = 999999;
|
||||
|
||||
-- Expected output if FIXED:
|
||||
-- name: Test Müller & Söhne GmbH (proper umlauts)
|
||||
-- address: Straße 123, 2. Stock
|
||||
-- city: München
|
||||
|
||||
-- Expected output if NOT FIXED:
|
||||
-- Characters will be corrupted or insert will fail
|
||||
|
||||
ROLLBACK; -- Don't actually save the test record
|
||||
|
||||
-- 6. VIEW KNOWN CORRUPTED RECORDS
|
||||
-- These show what the corruption looks like before manual repair
|
||||
|
||||
-- Principle ID 9 - Czech characters corrupted
|
||||
SELECT id, name, address, city
|
||||
FROM principles
|
||||
WHERE id = 9;
|
||||
-- BEFORE FIX: BezruÄova 2901, Rožnov pod RadhoÅ¡tÄ›m
|
||||
-- AFTER FIX (still needs manual repair): Same corruption
|
||||
-- AFTER MANUAL REPAIR: Bezručova 2901, Rožnov pod Radhoštěm
|
||||
|
||||
-- Principle ID 18 - German umlaut in city
|
||||
SELECT id, short_name, city, country_id
|
||||
FROM principles
|
||||
WHERE id = 18;
|
||||
|
||||
-- Customer corrupted names
|
||||
SELECT id, name
|
||||
FROM customers
|
||||
WHERE id IN (253, 1006, 1387, 1608, 2174);
|
||||
-- Look for: ’ (should be '), É (should be É), “ (should be ")
|
||||
|
||||
-- Address corrupted data
|
||||
SELECT id, address, city
|
||||
FROM addresses
|
||||
WHERE id IN (19, 25);
|
||||
-- Look for: â€" (should be –)
|
||||
|
||||
-- 7. COUNT RECORDS WITH NON-ASCII CHARACTERS
|
||||
-- Shows how many records might need manual repair after charset fix
|
||||
SELECT
|
||||
'principles' as table_name,
|
||||
COUNT(*) as records_with_non_ascii
|
||||
FROM principles
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
OR address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT 'customers', COUNT(*)
|
||||
FROM customers
|
||||
WHERE name REGEXP '[^ -~]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT 'addresses', COUNT(*)
|
||||
FROM addresses
|
||||
WHERE address REGEXP '[^ -~]'
|
||||
OR city REGEXP '[^ -~]'
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT 'contacts', COUNT(*)
|
||||
FROM contacts
|
||||
WHERE firstname REGEXP '[^ -~]'
|
||||
OR lastname REGEXP '[^ -~]';
|
||||
|
||||
-- 8. VERIFY CONNECTION CHARACTER SET
|
||||
-- Your client should also be using utf8mb4
|
||||
SHOW VARIABLES LIKE 'character_set_%';
|
||||
SHOW VARIABLES LIKE 'collation_%';
|
||||
|
||||
-- Should see:
|
||||
-- character_set_client: utf8mb4
|
||||
-- character_set_connection: utf8mb4
|
||||
-- character_set_results: utf8mb4
|
||||
-- character_set_database: utf8mb4
|
||||
|
||||
-- ============================================================================
|
||||
-- AFTER CONVERSION - TEST NEW INSERTS
|
||||
-- ============================================================================
|
||||
-- After running convert_charset_to_utf8mb4.sql, try these tests:
|
||||
|
||||
-- Test 1: Insert with German characters
|
||||
START TRANSACTION;
|
||||
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
||||
VALUES ('Müller GmbH', 999991, 'Hauptstraße 1', 'München', 'BY', '80331', 1, 1, 'MUL', 'https://test.de');
|
||||
SELECT * FROM principles WHERE code = 999991;
|
||||
-- Should show: Müller, Hauptstraße, München (correctly)
|
||||
ROLLBACK;
|
||||
|
||||
-- Test 2: Insert with French characters
|
||||
START TRANSACTION;
|
||||
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
||||
VALUES ('Société François', 999992, 'Rue de la Paix', 'Orléans', 'FR', '45000', 1, 1, 'SOC', 'https://test.fr');
|
||||
SELECT * FROM principles WHERE code = 999992;
|
||||
-- Should show: Société, François, Orléans (correctly)
|
||||
ROLLBACK;
|
||||
|
||||
-- Test 3: Insert with special punctuation
|
||||
START TRANSACTION;
|
||||
INSERT INTO customers (name, trading_name, abn, created, notes, discount_pricing_policies, payment_terms, customer_category_id, url, country_id)
|
||||
VALUES ('O'Connor & Sons — "Quality" Products', 'O'Connor Trading', '', NOW(), '', '', '', 1, '', 1);
|
||||
SELECT name FROM customers WHERE name LIKE '%O''Connor%' ORDER BY id DESC LIMIT 1;
|
||||
-- Should show: O'Connor & Sons — "Quality" Products (with proper apostrophes, em-dash, and quotes)
|
||||
ROLLBACK;
|
||||
|
||||
-- ============================================================================
|
||||
-- SUMMARY OF WHAT TO LOOK FOR
|
||||
-- ============================================================================
|
||||
|
||||
/*
|
||||
BEFORE FIX:
|
||||
- Table charset: latin1_swedish_ci
|
||||
- Column charset: latin1
|
||||
- Test insert: May fail or corrupt characters
|
||||
- Existing data: Shows mojibake (É, ’, etc.)
|
||||
|
||||
AFTER FIX (charset conversion only):
|
||||
- Table charset: utf8mb4_general_ci
|
||||
- Column charset: utf8mb4
|
||||
- Test insert: Succeeds with proper characters
|
||||
- Existing data: STILL shows mojibake (needs manual repair)
|
||||
|
||||
AFTER MANUAL REPAIR:
|
||||
- Everything displays correctly
|
||||
- New inserts work properly
|
||||
- Old data has been corrected
|
||||
*/
|
||||
Loading…
Reference in a new issue