cmc-sales/scripts/verify_charset_manual.sql
2025-12-10 07:27:36 +11:00

208 lines
6.6 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- ============================================================================
-- Manual MariaDB Character Encoding Verification Commands
-- ============================================================================
-- Run these commands directly in MariaDB to verify the charset fix
--
-- Connect to database:
-- docker exec -it cmc-db mariadb -u root -psecureRootPassword --default-character-set=utf8mb4 cmc
-- ============================================================================
-- 1. CHECK DATABASE-LEVEL SETTINGS
-- Should show: utf8mb4
SELECT @@character_set_database as db_charset,
@@collation_database as db_collation;
-- 2. COUNT TABLES BY CHARACTER SET
-- BEFORE FIX: ~59 latin1 tables
-- AFTER FIX: ~62 utf8mb4 tables
SELECT
CASE
WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1'
WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4'
WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3'
ELSE TABLE_COLLATION
END as charset,
COUNT(*) as table_count
FROM information_schema.TABLES
WHERE TABLE_SCHEMA='cmc'
GROUP BY charset
ORDER BY table_count DESC;
-- 3. CHECK SPECIFIC TABLES (Key business tables)
SELECT TABLE_NAME, TABLE_COLLATION, ENGINE
FROM information_schema.TABLES
WHERE TABLE_SCHEMA='cmc'
AND TABLE_NAME IN (
'principles', 'customers', 'addresses', 'contacts',
'products', 'quotes', 'purchase_orders', 'invoices'
)
ORDER BY TABLE_NAME;
-- 4. CHECK ALL COLUMN CHARSETS FOR 'principles' TABLE
-- All text columns should show utf8mb4 after fix
SELECT
COLUMN_NAME,
DATA_TYPE,
CHARACTER_SET_NAME,
COLLATION_NAME
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA='cmc'
AND TABLE_NAME='principles'
AND DATA_TYPE IN ('varchar', 'text', 'char')
ORDER BY ORDINAL_POSITION;
-- 5. TEST INSERT WITH SPECIAL CHARACTERS
-- This will be rolled back - just testing if charset accepts them
START TRANSACTION;
-- Insert test record with various special characters
INSERT INTO principles (
name, code, address, city, state, postcode,
country_id, currency_id, short_name, url
)
VALUES (
'Test Müller & Söhne GmbH', -- German umlauts (ü, ö)
999999,
'Straße 123, 2. Stock', -- German ß
'München', -- German ü
'Bayern',
'80331',
1, 1, 'TEST', 'http://test.com'
);
-- Read it back - should show properly if charset is correct
SELECT id, name, address, city
FROM principles
WHERE code = 999999;
-- Expected output if FIXED:
-- name: Test Müller & Söhne GmbH (proper umlauts)
-- address: Straße 123, 2. Stock
-- city: München
-- Expected output if NOT FIXED:
-- Characters will be corrupted or insert will fail
ROLLBACK; -- Don't actually save the test record
-- 6. VIEW KNOWN CORRUPTED RECORDS
-- These show what the corruption looks like before manual repair
-- Principle ID 9 - Czech characters corrupted
SELECT id, name, address, city
FROM principles
WHERE id = 9;
-- BEFORE FIX: BezruÄova 2901, Rožnov pod RadhoÅ¡tÄm
-- AFTER FIX (still needs manual repair): Same corruption
-- AFTER MANUAL REPAIR: Bezručova 2901, Rožnov pod Radhoštěm
-- Principle ID 18 - German umlaut in city
SELECT id, short_name, city, country_id
FROM principles
WHERE id = 18;
-- Customer corrupted names
SELECT id, name
FROM customers
WHERE id IN (253, 1006, 1387, 1608, 2174);
-- Look for: ’ (should be '), É (should be É), “ (should be ")
-- Address corrupted data
SELECT id, address, city
FROM addresses
WHERE id IN (19, 25);
-- Look for: â€" (should be )
-- 7. COUNT RECORDS WITH NON-ASCII CHARACTERS
-- Shows how many records might need manual repair after charset fix
SELECT
'principles' as table_name,
COUNT(*) as records_with_non_ascii
FROM principles
WHERE name REGEXP '[^ -~]'
OR address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT 'customers', COUNT(*)
FROM customers
WHERE name REGEXP '[^ -~]'
UNION ALL
SELECT 'addresses', COUNT(*)
FROM addresses
WHERE address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT 'contacts', COUNT(*)
FROM contacts
WHERE firstname REGEXP '[^ -~]'
OR lastname REGEXP '[^ -~]';
-- 8. VERIFY CONNECTION CHARACTER SET
-- Your client should also be using utf8mb4
SHOW VARIABLES LIKE 'character_set_%';
SHOW VARIABLES LIKE 'collation_%';
-- Should see:
-- character_set_client: utf8mb4
-- character_set_connection: utf8mb4
-- character_set_results: utf8mb4
-- character_set_database: utf8mb4
-- ============================================================================
-- AFTER CONVERSION - TEST NEW INSERTS
-- ============================================================================
-- After running convert_charset_to_utf8mb4.sql, try these tests:
-- Test 1: Insert with German characters
START TRANSACTION;
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
VALUES ('Müller GmbH', 999991, 'Hauptstraße 1', 'München', 'BY', '80331', 1, 1, 'MUL', 'https://test.de');
SELECT * FROM principles WHERE code = 999991;
-- Should show: Müller, Hauptstraße, München (correctly)
ROLLBACK;
-- Test 2: Insert with French characters
START TRANSACTION;
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
VALUES ('Société François', 999992, 'Rue de la Paix', 'Orléans', 'FR', '45000', 1, 1, 'SOC', 'https://test.fr');
SELECT * FROM principles WHERE code = 999992;
-- Should show: Société, François, Orléans (correctly)
ROLLBACK;
-- Test 3: Insert with special punctuation
START TRANSACTION;
INSERT INTO customers (name, trading_name, abn, created, notes, discount_pricing_policies, payment_terms, customer_category_id, url, country_id)
VALUES ('O'Connor & Sons "Quality" Products', 'O'Connor Trading', '', NOW(), '', '', '', 1, '', 1);
SELECT name FROM customers WHERE name LIKE '%O''Connor%' ORDER BY id DESC LIMIT 1;
-- Should show: O'Connor & Sons — "Quality" Products (with proper apostrophes, em-dash, and quotes)
ROLLBACK;
-- ============================================================================
-- SUMMARY OF WHAT TO LOOK FOR
-- ============================================================================
/*
BEFORE FIX:
- Table charset: latin1_swedish_ci
- Column charset: latin1
- Test insert: May fail or corrupt characters
- Existing data: Shows mojibake (É, ’, etc.)
AFTER FIX (charset conversion only):
- Table charset: utf8mb4_general_ci
- Column charset: utf8mb4
- Test insert: Succeeds with proper characters
- Existing data: STILL shows mojibake (needs manual repair)
AFTER MANUAL REPAIR:
- Everything displays correctly
- New inserts work properly
- Old data has been corrected
*/