208 lines
6.6 KiB
MySQL
208 lines
6.6 KiB
MySQL
|
|
-- ============================================================================
|
|||
|
|
-- Manual MariaDB Character Encoding Verification Commands
|
|||
|
|
-- ============================================================================
|
|||
|
|
-- Run these commands directly in MariaDB to verify the charset fix
|
|||
|
|
--
|
|||
|
|
-- Connect to database:
|
|||
|
|
-- docker exec -it cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc
|
|||
|
|
-- ============================================================================
|
|||
|
|
|
|||
|
|
-- 1. CHECK DATABASE-LEVEL SETTINGS
|
|||
|
|
-- Should show: utf8mb4
|
|||
|
|
SELECT @@character_set_database as db_charset,
|
|||
|
|
@@collation_database as db_collation;
|
|||
|
|
|
|||
|
|
-- 2. COUNT TABLES BY CHARACTER SET
|
|||
|
|
-- BEFORE FIX: ~59 latin1 tables
|
|||
|
|
-- AFTER FIX: ~62 utf8mb4 tables
|
|||
|
|
SELECT
|
|||
|
|
CASE
|
|||
|
|
WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1'
|
|||
|
|
WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4'
|
|||
|
|
WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3'
|
|||
|
|
ELSE TABLE_COLLATION
|
|||
|
|
END as charset,
|
|||
|
|
COUNT(*) as table_count
|
|||
|
|
FROM information_schema.TABLES
|
|||
|
|
WHERE TABLE_SCHEMA='cmc'
|
|||
|
|
GROUP BY charset
|
|||
|
|
ORDER BY table_count DESC;
|
|||
|
|
|
|||
|
|
-- 3. CHECK SPECIFIC TABLES (Key business tables)
|
|||
|
|
SELECT TABLE_NAME, TABLE_COLLATION, ENGINE
|
|||
|
|
FROM information_schema.TABLES
|
|||
|
|
WHERE TABLE_SCHEMA='cmc'
|
|||
|
|
AND TABLE_NAME IN (
|
|||
|
|
'principles', 'customers', 'addresses', 'contacts',
|
|||
|
|
'products', 'quotes', 'purchase_orders', 'invoices'
|
|||
|
|
)
|
|||
|
|
ORDER BY TABLE_NAME;
|
|||
|
|
|
|||
|
|
-- 4. CHECK ALL COLUMN CHARSETS FOR 'principles' TABLE
|
|||
|
|
-- All text columns should show utf8mb4 after fix
|
|||
|
|
SELECT
|
|||
|
|
COLUMN_NAME,
|
|||
|
|
DATA_TYPE,
|
|||
|
|
CHARACTER_SET_NAME,
|
|||
|
|
COLLATION_NAME
|
|||
|
|
FROM information_schema.COLUMNS
|
|||
|
|
WHERE TABLE_SCHEMA='cmc'
|
|||
|
|
AND TABLE_NAME='principles'
|
|||
|
|
AND DATA_TYPE IN ('varchar', 'text', 'char')
|
|||
|
|
ORDER BY ORDINAL_POSITION;
|
|||
|
|
|
|||
|
|
-- 5. TEST INSERT WITH SPECIAL CHARACTERS
|
|||
|
|
-- This will be rolled back - just testing if charset accepts them
|
|||
|
|
START TRANSACTION;
|
|||
|
|
|
|||
|
|
-- Insert test record with various special characters
|
|||
|
|
INSERT INTO principles (
|
|||
|
|
name, code, address, city, state, postcode,
|
|||
|
|
country_id, currency_id, short_name, url
|
|||
|
|
)
|
|||
|
|
VALUES (
|
|||
|
|
'Test Müller & Söhne GmbH', -- German umlauts (ü, ö)
|
|||
|
|
999999,
|
|||
|
|
'Straße 123, 2. Stock', -- German ß
|
|||
|
|
'München', -- German ü
|
|||
|
|
'Bayern',
|
|||
|
|
'80331',
|
|||
|
|
1, 1, 'TEST', 'http://test.com'
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
-- Read it back - should show properly if charset is correct
|
|||
|
|
SELECT id, name, address, city
|
|||
|
|
FROM principles
|
|||
|
|
WHERE code = 999999;
|
|||
|
|
|
|||
|
|
-- Expected output if FIXED:
|
|||
|
|
-- name: Test Müller & Söhne GmbH (proper umlauts)
|
|||
|
|
-- address: Straße 123, 2. Stock
|
|||
|
|
-- city: München
|
|||
|
|
|
|||
|
|
-- Expected output if NOT FIXED:
|
|||
|
|
-- Characters will be corrupted or insert will fail
|
|||
|
|
|
|||
|
|
ROLLBACK; -- Don't actually save the test record
|
|||
|
|
|
|||
|
|
-- 6. VIEW KNOWN CORRUPTED RECORDS
|
|||
|
|
-- These show what the corruption looks like before manual repair
|
|||
|
|
|
|||
|
|
-- Principle ID 9 - Czech characters corrupted
|
|||
|
|
SELECT id, name, address, city
|
|||
|
|
FROM principles
|
|||
|
|
WHERE id = 9;
|
|||
|
|
-- BEFORE FIX: BezruÄova 2901, Rožnov pod RadhoÅ¡tÄ›m
|
|||
|
|
-- AFTER FIX (still needs manual repair): Same corruption
|
|||
|
|
-- AFTER MANUAL REPAIR: Bezručova 2901, Rožnov pod Radhoštěm
|
|||
|
|
|
|||
|
|
-- Principle ID 18 - German umlaut in city
|
|||
|
|
SELECT id, short_name, city, country_id
|
|||
|
|
FROM principles
|
|||
|
|
WHERE id = 18;
|
|||
|
|
|
|||
|
|
-- Customer corrupted names
|
|||
|
|
SELECT id, name
|
|||
|
|
FROM customers
|
|||
|
|
WHERE id IN (253, 1006, 1387, 1608, 2174);
|
|||
|
|
-- Look for: ’ (should be '), É (should be É), “ (should be ")
|
|||
|
|
|
|||
|
|
-- Address corrupted data
|
|||
|
|
SELECT id, address, city
|
|||
|
|
FROM addresses
|
|||
|
|
WHERE id IN (19, 25);
|
|||
|
|
-- Look for: â€" (should be –)
|
|||
|
|
|
|||
|
|
-- 7. COUNT RECORDS WITH NON-ASCII CHARACTERS
|
|||
|
|
-- Shows how many records might need manual repair after charset fix
|
|||
|
|
SELECT
|
|||
|
|
'principles' as table_name,
|
|||
|
|
COUNT(*) as records_with_non_ascii
|
|||
|
|
FROM principles
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
OR address REGEXP '[^ -~]'
|
|||
|
|
OR city REGEXP '[^ -~]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT 'customers', COUNT(*)
|
|||
|
|
FROM customers
|
|||
|
|
WHERE name REGEXP '[^ -~]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT 'addresses', COUNT(*)
|
|||
|
|
FROM addresses
|
|||
|
|
WHERE address REGEXP '[^ -~]'
|
|||
|
|
OR city REGEXP '[^ -~]'
|
|||
|
|
|
|||
|
|
UNION ALL
|
|||
|
|
|
|||
|
|
SELECT 'contacts', COUNT(*)
|
|||
|
|
FROM contacts
|
|||
|
|
WHERE firstname REGEXP '[^ -~]'
|
|||
|
|
OR lastname REGEXP '[^ -~]';
|
|||
|
|
|
|||
|
|
-- 8. VERIFY CONNECTION CHARACTER SET
|
|||
|
|
-- Your client should also be using utf8mb4
|
|||
|
|
SHOW VARIABLES LIKE 'character_set_%';
|
|||
|
|
SHOW VARIABLES LIKE 'collation_%';
|
|||
|
|
|
|||
|
|
-- Should see:
|
|||
|
|
-- character_set_client: utf8mb4
|
|||
|
|
-- character_set_connection: utf8mb4
|
|||
|
|
-- character_set_results: utf8mb4
|
|||
|
|
-- character_set_database: utf8mb4
|
|||
|
|
|
|||
|
|
-- ============================================================================
|
|||
|
|
-- AFTER CONVERSION - TEST NEW INSERTS
|
|||
|
|
-- ============================================================================
|
|||
|
|
-- After running convert_charset_to_utf8mb4.sql, try these tests:
|
|||
|
|
|
|||
|
|
-- Test 1: Insert with German characters
|
|||
|
|
START TRANSACTION;
|
|||
|
|
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
|||
|
|
VALUES ('Müller GmbH', 999991, 'Hauptstraße 1', 'München', 'BY', '80331', 1, 1, 'MUL', 'https://test.de');
|
|||
|
|
SELECT * FROM principles WHERE code = 999991;
|
|||
|
|
-- Should show: Müller, Hauptstraße, München (correctly)
|
|||
|
|
ROLLBACK;
|
|||
|
|
|
|||
|
|
-- Test 2: Insert with French characters
|
|||
|
|
START TRANSACTION;
|
|||
|
|
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
|||
|
|
VALUES ('Société François', 999992, 'Rue de la Paix', 'Orléans', 'FR', '45000', 1, 1, 'SOC', 'https://test.fr');
|
|||
|
|
SELECT * FROM principles WHERE code = 999992;
|
|||
|
|
-- Should show: Société, François, Orléans (correctly)
|
|||
|
|
ROLLBACK;
|
|||
|
|
|
|||
|
|
-- Test 3: Insert with special punctuation
|
|||
|
|
START TRANSACTION;
|
|||
|
|
INSERT INTO customers (name, trading_name, abn, created, notes, discount_pricing_policies, payment_terms, customer_category_id, url, country_id)
|
|||
|
|
VALUES ('O'Connor & Sons — "Quality" Products', 'O'Connor Trading', '', NOW(), '', '', '', 1, '', 1);
|
|||
|
|
SELECT name FROM customers WHERE name LIKE '%O''Connor%' ORDER BY id DESC LIMIT 1;
|
|||
|
|
-- Should show: O'Connor & Sons — "Quality" Products (with proper apostrophes, em-dash, and quotes)
|
|||
|
|
ROLLBACK;
|
|||
|
|
|
|||
|
|
-- ============================================================================
|
|||
|
|
-- SUMMARY OF WHAT TO LOOK FOR
|
|||
|
|
-- ============================================================================
|
|||
|
|
|
|||
|
|
/*
|
|||
|
|
BEFORE FIX:
|
|||
|
|
- Table charset: latin1_swedish_ci
|
|||
|
|
- Column charset: latin1
|
|||
|
|
- Test insert: May fail or corrupt characters
|
|||
|
|
- Existing data: Shows mojibake (É, ’, etc.)
|
|||
|
|
|
|||
|
|
AFTER FIX (charset conversion only):
|
|||
|
|
- Table charset: utf8mb4_general_ci
|
|||
|
|
- Column charset: utf8mb4
|
|||
|
|
- Test insert: Succeeds with proper characters
|
|||
|
|
- Existing data: STILL shows mojibake (needs manual repair)
|
|||
|
|
|
|||
|
|
AFTER MANUAL REPAIR:
|
|||
|
|
- Everything displays correctly
|
|||
|
|
- New inserts work properly
|
|||
|
|
- Old data has been corrected
|
|||
|
|
*/
|