-- ============================================================================ -- Manual MariaDB Character Encoding Verification Commands -- ============================================================================ -- Run these commands directly in MariaDB to verify the charset fix -- -- Connect to database: -- docker exec -it cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc -- ============================================================================ -- 1. CHECK DATABASE-LEVEL SETTINGS -- Should show: utf8mb4 SELECT @@character_set_database as db_charset, @@collation_database as db_collation; -- 2. COUNT TABLES BY CHARACTER SET -- BEFORE FIX: ~59 latin1 tables -- AFTER FIX: ~62 utf8mb4 tables SELECT CASE WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1' WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4' WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3' ELSE TABLE_COLLATION END as charset, COUNT(*) as table_count FROM information_schema.TABLES WHERE TABLE_SCHEMA='cmc' GROUP BY charset ORDER BY table_count DESC; -- 3. CHECK SPECIFIC TABLES (Key business tables) SELECT TABLE_NAME, TABLE_COLLATION, ENGINE FROM information_schema.TABLES WHERE TABLE_SCHEMA='cmc' AND TABLE_NAME IN ( 'principles', 'customers', 'addresses', 'contacts', 'products', 'quotes', 'purchase_orders', 'invoices' ) ORDER BY TABLE_NAME; -- 4. CHECK ALL COLUMN CHARSETS FOR 'principles' TABLE -- All text columns should show utf8mb4 after fix SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_SET_NAME, COLLATION_NAME FROM information_schema.COLUMNS WHERE TABLE_SCHEMA='cmc' AND TABLE_NAME='principles' AND DATA_TYPE IN ('varchar', 'text', 'char') ORDER BY ORDINAL_POSITION; -- 5. TEST INSERT WITH SPECIAL CHARACTERS -- This will be rolled back - just testing if charset accepts them START TRANSACTION; -- Insert test record with various special characters INSERT INTO principles ( name, code, address, city, state, postcode, country_id, currency_id, short_name, url ) VALUES ( 'Test Müller & Söhne GmbH', -- German umlauts (ü, ö) 999999, 'Straße 123, 2. Stock', -- German ß 'München', -- German ü 'Bayern', '80331', 1, 1, 'TEST', 'http://test.com' ); -- Read it back - should show properly if charset is correct SELECT id, name, address, city FROM principles WHERE code = 999999; -- Expected output if FIXED: -- name: Test Müller & Söhne GmbH (proper umlauts) -- address: Straße 123, 2. Stock -- city: München -- Expected output if NOT FIXED: -- Characters will be corrupted or insert will fail ROLLBACK; -- Don't actually save the test record -- 6. VIEW KNOWN CORRUPTED RECORDS -- These show what the corruption looks like before manual repair -- Principle ID 9 - Czech characters corrupted SELECT id, name, address, city FROM principles WHERE id = 9; -- BEFORE FIX: BezruÄova 2901, Rožnov pod RadhoÅ¡tÄ›m -- AFTER FIX (still needs manual repair): Same corruption -- AFTER MANUAL REPAIR: Bezručova 2901, Rožnov pod Radhoštěm -- Principle ID 18 - German umlaut in city SELECT id, short_name, city, country_id FROM principles WHERE id = 18; -- Customer corrupted names SELECT id, name FROM customers WHERE id IN (253, 1006, 1387, 1608, 2174); -- Look for: ’ (should be '), É (should be É), “ (should be ") -- Address corrupted data SELECT id, address, city FROM addresses WHERE id IN (19, 25); -- Look for: â€" (should be –) -- 7. COUNT RECORDS WITH NON-ASCII CHARACTERS -- Shows how many records might need manual repair after charset fix SELECT 'principles' as table_name, COUNT(*) as records_with_non_ascii FROM principles WHERE name REGEXP '[^ -~]' OR address REGEXP '[^ -~]' OR city REGEXP '[^ -~]' UNION ALL SELECT 'customers', COUNT(*) FROM customers WHERE name REGEXP '[^ -~]' UNION ALL SELECT 'addresses', COUNT(*) FROM addresses WHERE address REGEXP '[^ -~]' OR city REGEXP '[^ -~]' UNION ALL SELECT 'contacts', COUNT(*) FROM contacts WHERE firstname REGEXP '[^ -~]' OR lastname REGEXP '[^ -~]'; -- 8. VERIFY CONNECTION CHARACTER SET -- Your client should also be using utf8mb4 SHOW VARIABLES LIKE 'character_set_%'; SHOW VARIABLES LIKE 'collation_%'; -- Should see: -- character_set_client: utf8mb4 -- character_set_connection: utf8mb4 -- character_set_results: utf8mb4 -- character_set_database: utf8mb4 -- ============================================================================ -- AFTER CONVERSION - TEST NEW INSERTS -- ============================================================================ -- After running convert_charset_to_utf8mb4.sql, try these tests: -- Test 1: Insert with German characters START TRANSACTION; INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url) VALUES ('Müller GmbH', 999991, 'Hauptstraße 1', 'München', 'BY', '80331', 1, 1, 'MUL', 'https://test.de'); SELECT * FROM principles WHERE code = 999991; -- Should show: Müller, Hauptstraße, München (correctly) ROLLBACK; -- Test 2: Insert with French characters START TRANSACTION; INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url) VALUES ('Société François', 999992, 'Rue de la Paix', 'Orléans', 'FR', '45000', 1, 1, 'SOC', 'https://test.fr'); SELECT * FROM principles WHERE code = 999992; -- Should show: Société, François, Orléans (correctly) ROLLBACK; -- Test 3: Insert with special punctuation START TRANSACTION; INSERT INTO customers (name, trading_name, abn, created, notes, discount_pricing_policies, payment_terms, customer_category_id, url, country_id) VALUES ('O'Connor & Sons — "Quality" Products', 'O'Connor Trading', '', NOW(), '', '', '', 1, '', 1); SELECT name FROM customers WHERE name LIKE '%O''Connor%' ORDER BY id DESC LIMIT 1; -- Should show: O'Connor & Sons — "Quality" Products (with proper apostrophes, em-dash, and quotes) ROLLBACK; -- ============================================================================ -- SUMMARY OF WHAT TO LOOK FOR -- ============================================================================ /* BEFORE FIX: - Table charset: latin1_swedish_ci - Column charset: latin1 - Test insert: May fail or corrupt characters - Existing data: Shows mojibake (É, ’, etc.) AFTER FIX (charset conversion only): - Table charset: utf8mb4_general_ci - Column charset: utf8mb4 - Test insert: Succeeds with proper characters - Existing data: STILL shows mojibake (needs manual repair) AFTER MANUAL REPAIR: - Everything displays correctly - New inserts work properly - Old data has been corrected */