208 lines
6.6 KiB
SQL
208 lines
6.6 KiB
SQL
-- ============================================================================
|
||
-- Manual MariaDB Character Encoding Verification Commands
|
||
-- ============================================================================
|
||
-- Run these commands directly in MariaDB to verify the charset fix
|
||
--
|
||
-- Connect to database:
|
||
-- docker exec -it cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc
|
||
-- ============================================================================
|
||
|
||
-- 1. CHECK DATABASE-LEVEL SETTINGS
|
||
-- Should show: utf8mb4
|
||
SELECT @@character_set_database as db_charset,
|
||
@@collation_database as db_collation;
|
||
|
||
-- 2. COUNT TABLES BY CHARACTER SET
|
||
-- BEFORE FIX: ~59 latin1 tables
|
||
-- AFTER FIX: ~62 utf8mb4 tables
|
||
SELECT
|
||
CASE
|
||
WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1'
|
||
WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4'
|
||
WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3'
|
||
ELSE TABLE_COLLATION
|
||
END as charset,
|
||
COUNT(*) as table_count
|
||
FROM information_schema.TABLES
|
||
WHERE TABLE_SCHEMA='cmc'
|
||
GROUP BY charset
|
||
ORDER BY table_count DESC;
|
||
|
||
-- 3. CHECK SPECIFIC TABLES (Key business tables)
|
||
SELECT TABLE_NAME, TABLE_COLLATION, ENGINE
|
||
FROM information_schema.TABLES
|
||
WHERE TABLE_SCHEMA='cmc'
|
||
AND TABLE_NAME IN (
|
||
'principles', 'customers', 'addresses', 'contacts',
|
||
'products', 'quotes', 'purchase_orders', 'invoices'
|
||
)
|
||
ORDER BY TABLE_NAME;
|
||
|
||
-- 4. CHECK ALL COLUMN CHARSETS FOR 'principles' TABLE
|
||
-- All text columns should show utf8mb4 after fix
|
||
SELECT
|
||
COLUMN_NAME,
|
||
DATA_TYPE,
|
||
CHARACTER_SET_NAME,
|
||
COLLATION_NAME
|
||
FROM information_schema.COLUMNS
|
||
WHERE TABLE_SCHEMA='cmc'
|
||
AND TABLE_NAME='principles'
|
||
AND DATA_TYPE IN ('varchar', 'text', 'char')
|
||
ORDER BY ORDINAL_POSITION;
|
||
|
||
-- 5. TEST INSERT WITH SPECIAL CHARACTERS
|
||
-- This will be rolled back - just testing if charset accepts them
|
||
START TRANSACTION;
|
||
|
||
-- Insert test record with various special characters
|
||
INSERT INTO principles (
|
||
name, code, address, city, state, postcode,
|
||
country_id, currency_id, short_name, url
|
||
)
|
||
VALUES (
|
||
'Test Müller & Söhne GmbH', -- German umlauts (ü, ö)
|
||
999999,
|
||
'Straße 123, 2. Stock', -- German ß
|
||
'München', -- German ü
|
||
'Bayern',
|
||
'80331',
|
||
1, 1, 'TEST', 'http://test.com'
|
||
);
|
||
|
||
-- Read it back - should show properly if charset is correct
|
||
SELECT id, name, address, city
|
||
FROM principles
|
||
WHERE code = 999999;
|
||
|
||
-- Expected output if FIXED:
|
||
-- name: Test Müller & Söhne GmbH (proper umlauts)
|
||
-- address: Straße 123, 2. Stock
|
||
-- city: München
|
||
|
||
-- Expected output if NOT FIXED:
|
||
-- Characters will be corrupted or insert will fail
|
||
|
||
ROLLBACK; -- Don't actually save the test record
|
||
|
||
-- 6. VIEW KNOWN CORRUPTED RECORDS
|
||
-- These show what the corruption looks like before manual repair
|
||
|
||
-- Principle ID 9 - Czech characters corrupted
|
||
SELECT id, name, address, city
|
||
FROM principles
|
||
WHERE id = 9;
|
||
-- BEFORE FIX: BezruÄova 2901, Rožnov pod RadhoÅ¡tÄ›m
|
||
-- AFTER FIX (still needs manual repair): Same corruption
|
||
-- AFTER MANUAL REPAIR: Bezručova 2901, Rožnov pod Radhoštěm
|
||
|
||
-- Principle ID 18 - German umlaut in city
|
||
SELECT id, short_name, city, country_id
|
||
FROM principles
|
||
WHERE id = 18;
|
||
|
||
-- Customer corrupted names
|
||
SELECT id, name
|
||
FROM customers
|
||
WHERE id IN (253, 1006, 1387, 1608, 2174);
|
||
-- Look for: ’ (should be '), É (should be É), “ (should be ")
|
||
|
||
-- Address corrupted data
|
||
SELECT id, address, city
|
||
FROM addresses
|
||
WHERE id IN (19, 25);
|
||
-- Look for: â€" (should be –)
|
||
|
||
-- 7. COUNT RECORDS WITH NON-ASCII CHARACTERS
|
||
-- Shows how many records might need manual repair after charset fix
|
||
SELECT
|
||
'principles' as table_name,
|
||
COUNT(*) as records_with_non_ascii
|
||
FROM principles
|
||
WHERE name REGEXP '[^ -~]'
|
||
OR address REGEXP '[^ -~]'
|
||
OR city REGEXP '[^ -~]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT 'customers', COUNT(*)
|
||
FROM customers
|
||
WHERE name REGEXP '[^ -~]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT 'addresses', COUNT(*)
|
||
FROM addresses
|
||
WHERE address REGEXP '[^ -~]'
|
||
OR city REGEXP '[^ -~]'
|
||
|
||
UNION ALL
|
||
|
||
SELECT 'contacts', COUNT(*)
|
||
FROM contacts
|
||
WHERE firstname REGEXP '[^ -~]'
|
||
OR lastname REGEXP '[^ -~]';
|
||
|
||
-- 8. VERIFY CONNECTION CHARACTER SET
|
||
-- Your client should also be using utf8mb4
|
||
SHOW VARIABLES LIKE 'character_set_%';
|
||
SHOW VARIABLES LIKE 'collation_%';
|
||
|
||
-- Should see:
|
||
-- character_set_client: utf8mb4
|
||
-- character_set_connection: utf8mb4
|
||
-- character_set_results: utf8mb4
|
||
-- character_set_database: utf8mb4
|
||
|
||
-- ============================================================================
|
||
-- AFTER CONVERSION - TEST NEW INSERTS
|
||
-- ============================================================================
|
||
-- After running convert_charset_to_utf8mb4.sql, try these tests:
|
||
|
||
-- Test 1: Insert with German characters
|
||
START TRANSACTION;
|
||
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
||
VALUES ('Müller GmbH', 999991, 'Hauptstraße 1', 'München', 'BY', '80331', 1, 1, 'MUL', 'https://test.de');
|
||
SELECT * FROM principles WHERE code = 999991;
|
||
-- Should show: Müller, Hauptstraße, München (correctly)
|
||
ROLLBACK;
|
||
|
||
-- Test 2: Insert with French characters
|
||
START TRANSACTION;
|
||
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
|
||
VALUES ('Société François', 999992, 'Rue de la Paix', 'Orléans', 'FR', '45000', 1, 1, 'SOC', 'https://test.fr');
|
||
SELECT * FROM principles WHERE code = 999992;
|
||
-- Should show: Société, François, Orléans (correctly)
|
||
ROLLBACK;
|
||
|
||
-- Test 3: Insert with special punctuation
|
||
START TRANSACTION;
|
||
INSERT INTO customers (name, trading_name, abn, created, notes, discount_pricing_policies, payment_terms, customer_category_id, url, country_id)
|
||
VALUES ('O'Connor & Sons — "Quality" Products', 'O'Connor Trading', '', NOW(), '', '', '', 1, '', 1);
|
||
SELECT name FROM customers WHERE name LIKE '%O''Connor%' ORDER BY id DESC LIMIT 1;
|
||
-- Should show: O'Connor & Sons — "Quality" Products (with proper apostrophes, em-dash, and quotes)
|
||
ROLLBACK;
|
||
|
||
-- ============================================================================
|
||
-- SUMMARY OF WHAT TO LOOK FOR
|
||
-- ============================================================================
|
||
|
||
/*
|
||
BEFORE FIX:
|
||
- Table charset: latin1_swedish_ci
|
||
- Column charset: latin1
|
||
- Test insert: May fail or corrupt characters
|
||
- Existing data: Shows mojibake (É, ’, etc.)
|
||
|
||
AFTER FIX (charset conversion only):
|
||
- Table charset: utf8mb4_general_ci
|
||
- Column charset: utf8mb4
|
||
- Test insert: Succeeds with proper characters
|
||
- Existing data: STILL shows mojibake (needs manual repair)
|
||
|
||
AFTER MANUAL REPAIR:
|
||
- Everything displays correctly
|
||
- New inserts work properly
|
||
- Old data has been corrected
|
||
*/
|