cmc-sales/scripts/verify_charset_manual.sql

208 lines
6.6 KiB
MySQL
Raw Normal View History

2025-12-09 12:23:22 -08:00
-- ============================================================================
-- Manual MariaDB Character Encoding Verification Commands
-- ============================================================================
-- Run these commands directly in MariaDB to verify the charset fix
--
-- Connect to database:
-- docker exec -it cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc
-- ============================================================================
-- 1. CHECK DATABASE-LEVEL SETTINGS
-- Should show: utf8mb4
SELECT @@character_set_database as db_charset,
@@collation_database as db_collation;
-- 2. COUNT TABLES BY CHARACTER SET
-- BEFORE FIX: ~59 latin1 tables
-- AFTER FIX: ~62 utf8mb4 tables
SELECT
CASE
WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1'
WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4'
WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3'
ELSE TABLE_COLLATION
END as charset,
COUNT(*) as table_count
FROM information_schema.TABLES
WHERE TABLE_SCHEMA='cmc'
GROUP BY charset
ORDER BY table_count DESC;
-- 3. CHECK SPECIFIC TABLES (Key business tables)
SELECT TABLE_NAME, TABLE_COLLATION, ENGINE
FROM information_schema.TABLES
WHERE TABLE_SCHEMA='cmc'
AND TABLE_NAME IN (
'principles', 'customers', 'addresses', 'contacts',
'products', 'quotes', 'purchase_orders', 'invoices'
)
ORDER BY TABLE_NAME;
-- 4. CHECK ALL COLUMN CHARSETS FOR 'principles' TABLE
-- All text columns should show utf8mb4 after fix
SELECT
COLUMN_NAME,
DATA_TYPE,
CHARACTER_SET_NAME,
COLLATION_NAME
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA='cmc'
AND TABLE_NAME='principles'
AND DATA_TYPE IN ('varchar', 'text', 'char')
ORDER BY ORDINAL_POSITION;
-- 5. TEST INSERT WITH SPECIAL CHARACTERS
-- This will be rolled back - just testing if charset accepts them
START TRANSACTION;
-- Insert test record with various special characters
INSERT INTO principles (
name, code, address, city, state, postcode,
country_id, currency_id, short_name, url
)
VALUES (
'Test Müller & Söhne GmbH', -- German umlauts (ü, ö)
999999,
'Straße 123, 2. Stock', -- German ß
'München', -- German ü
'Bayern',
'80331',
1, 1, 'TEST', 'http://test.com'
);
-- Read it back - should show properly if charset is correct
SELECT id, name, address, city
FROM principles
WHERE code = 999999;
-- Expected output if FIXED:
-- name: Test Müller & Söhne GmbH (proper umlauts)
-- address: Straße 123, 2. Stock
-- city: München
-- Expected output if NOT FIXED:
-- Characters will be corrupted or insert will fail
ROLLBACK; -- Don't actually save the test record
-- 6. VIEW KNOWN CORRUPTED RECORDS
-- These show what the corruption looks like before manual repair
-- Principle ID 9 - Czech characters corrupted
SELECT id, name, address, city
FROM principles
WHERE id = 9;
-- BEFORE FIX: BezruÄova 2901, Rožnov pod RadhoÅ¡tÄm
-- AFTER FIX (still needs manual repair): Same corruption
-- AFTER MANUAL REPAIR: Bezručova 2901, Rožnov pod Radhoštěm
-- Principle ID 18 - German umlaut in city
SELECT id, short_name, city, country_id
FROM principles
WHERE id = 18;
-- Customer corrupted names
SELECT id, name
FROM customers
WHERE id IN (253, 1006, 1387, 1608, 2174);
-- Look for: ’ (should be '), É (should be É), “ (should be ")
-- Address corrupted data
SELECT id, address, city
FROM addresses
WHERE id IN (19, 25);
-- Look for: â€" (should be )
-- 7. COUNT RECORDS WITH NON-ASCII CHARACTERS
-- Shows how many records might need manual repair after charset fix
SELECT
'principles' as table_name,
COUNT(*) as records_with_non_ascii
FROM principles
WHERE name REGEXP '[^ -~]'
OR address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT 'customers', COUNT(*)
FROM customers
WHERE name REGEXP '[^ -~]'
UNION ALL
SELECT 'addresses', COUNT(*)
FROM addresses
WHERE address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT 'contacts', COUNT(*)
FROM contacts
WHERE firstname REGEXP '[^ -~]'
OR lastname REGEXP '[^ -~]';
-- 8. VERIFY CONNECTION CHARACTER SET
-- Your client should also be using utf8mb4
SHOW VARIABLES LIKE 'character_set_%';
SHOW VARIABLES LIKE 'collation_%';
-- Should see:
-- character_set_client: utf8mb4
-- character_set_connection: utf8mb4
-- character_set_results: utf8mb4
-- character_set_database: utf8mb4
-- ============================================================================
-- AFTER CONVERSION - TEST NEW INSERTS
-- ============================================================================
-- After running convert_charset_to_utf8mb4.sql, try these tests:
-- Test 1: Insert with German characters
START TRANSACTION;
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
VALUES ('Müller GmbH', 999991, 'Hauptstraße 1', 'München', 'BY', '80331', 1, 1, 'MUL', 'https://test.de');
SELECT * FROM principles WHERE code = 999991;
-- Should show: Müller, Hauptstraße, München (correctly)
ROLLBACK;
-- Test 2: Insert with French characters
START TRANSACTION;
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
VALUES ('Société François', 999992, 'Rue de la Paix', 'Orléans', 'FR', '45000', 1, 1, 'SOC', 'https://test.fr');
SELECT * FROM principles WHERE code = 999992;
-- Should show: Société, François, Orléans (correctly)
ROLLBACK;
-- Test 3: Insert with special punctuation
START TRANSACTION;
INSERT INTO customers (name, trading_name, abn, created, notes, discount_pricing_policies, payment_terms, customer_category_id, url, country_id)
VALUES ('O'Connor & Sons "Quality" Products', 'O'Connor Trading', '', NOW(), '', '', '', 1, '', 1);
SELECT name FROM customers WHERE name LIKE '%O''Connor%' ORDER BY id DESC LIMIT 1;
-- Should show: O'Connor & Sons — "Quality" Products (with proper apostrophes, em-dash, and quotes)
ROLLBACK;
-- ============================================================================
-- SUMMARY OF WHAT TO LOOK FOR
-- ============================================================================
/*
BEFORE FIX:
- Table charset: latin1_swedish_ci
- Column charset: latin1
- Test insert: May fail or corrupt characters
- Existing data: Shows mojibake (Ã, â, etc.)
AFTER FIX (charset conversion only):
- Table charset: utf8mb4_general_ci
- Column charset: utf8mb4
- Test insert: Succeeds with proper characters
- Existing data: STILL shows mojibake (needs manual repair)
AFTER MANUAL REPAIR:
- Everything displays correctly
- New inserts work properly
- Old data has been corrected
*/