cmc-sales/scripts/verify_charset_fix.sh
2025-12-10 07:27:36 +11:00

164 lines
4.2 KiB
Bash
Executable file

#!/bin/bash
# Script to verify character encoding fix in MariaDB
DB_USER="root"
DB_PASS="secureRootPassword"
DB_NAME="cmc"
CONTAINER="cmc-db"
echo "=========================================="
echo "Character Encoding Verification Script"
echo "=========================================="
echo ""
# Function to run MariaDB command
run_sql() {
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS $DB_NAME -e "$1"
}
# Function to run MariaDB command with UTF-8 client
run_sql_utf8() {
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS --default-character-set=utf8mb4 $DB_NAME -e "$1"
}
echo "1. Check database default charset"
echo "-----------------------------------"
docker exec $CONTAINER mariadb -u $DB_USER -p$DB_PASS -e "
SELECT @@character_set_database, @@collation_database;
"
echo ""
echo "2. Count tables by charset (BEFORE should show ~59 latin1)"
echo "------------------------------------------------------------"
run_sql "
SELECT
CASE
WHEN TABLE_COLLATION LIKE 'latin1%' THEN 'latin1'
WHEN TABLE_COLLATION LIKE 'utf8mb4%' THEN 'utf8mb4'
WHEN TABLE_COLLATION LIKE 'utf8mb3%' THEN 'utf8mb3'
ELSE TABLE_COLLATION
END as charset,
COUNT(*) as table_count
FROM information_schema.TABLES
WHERE TABLE_SCHEMA='$DB_NAME'
GROUP BY charset
ORDER BY table_count DESC;
"
echo ""
echo "3. Check specific table charsets (sample)"
echo "------------------------------------------"
run_sql "
SELECT TABLE_NAME, TABLE_COLLATION
FROM information_schema.TABLES
WHERE TABLE_SCHEMA='$DB_NAME'
AND TABLE_NAME IN ('principles', 'customers', 'addresses', 'contacts', 'products')
ORDER BY TABLE_NAME;
"
echo ""
echo "4. Show column charsets for principles table"
echo "---------------------------------------------"
run_sql "
SELECT COLUMN_NAME, CHARACTER_SET_NAME, COLLATION_NAME
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA='$DB_NAME'
AND TABLE_NAME='principles'
AND DATA_TYPE IN ('varchar', 'text', 'char')
ORDER BY ORDINAL_POSITION;
"
echo ""
echo "5. Test INSERT of special characters (will be rolled back)"
echo "-----------------------------------------------------------"
echo "Attempting to insert test record with special characters..."
run_sql_utf8 "
START TRANSACTION;
INSERT INTO principles (name, code, address, city, state, postcode, country_id, currency_id, short_name, url)
VALUES (
'Test Müller & Söhne GmbH', -- German umlauts
999999,
'Straße 123, 2. Stock', -- German ß and ² symbol
'München', -- German ü
'Bayern',
'80331',
1,
1,
'TEST',
'http://test.com'
);
-- Read it back immediately
SELECT id, name, address, city
FROM principles
WHERE code = 999999;
ROLLBACK;
" 2>&1 | grep -A 10 "id\|Test Müller\|ERROR"
echo "✓ Test insert rolled back"
echo ""
echo "6. Verify current corrupted data in principles"
echo "-----------------------------------------------"
echo "These should show corruption if NOT yet fixed:"
run_sql_utf8 "
SELECT id, name, city
FROM principles
WHERE id IN (9, 18);
"
echo ""
echo "7. Verify corrupted data in customers"
echo "--------------------------------------"
run_sql_utf8 "
SELECT id, name
FROM customers
WHERE id IN (253, 1006, 1387, 1608);
"
echo ""
echo "8. Count potentially corrupted records across tables"
echo "-----------------------------------------------------"
run_sql "
SELECT
'principles' as table_name,
COUNT(*) as rows_with_non_ascii
FROM principles
WHERE name REGEXP '[^ -~]'
OR address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT
'customers',
COUNT(*)
FROM customers
WHERE name REGEXP '[^ -~]'
UNION ALL
SELECT
'addresses',
COUNT(*)
FROM addresses
WHERE address REGEXP '[^ -~]'
OR city REGEXP '[^ -~]'
UNION ALL
SELECT
'contacts',
COUNT(*)
FROM contacts
WHERE firstname REGEXP '[^ -~]'
OR lastname REGEXP '[^ -~]';
"
echo ""
echo "=========================================="
echo "Verification complete!"
echo "=========================================="
echo ""
echo "INTERPRETATION:"
echo "- If table_count for 'latin1' is ~59: NOT YET FIXED"
echo "- If table_count for 'utf8mb4' is ~62: FIXED!"
echo "- Test insert should show proper characters if fixed"
echo "- Corrupted data will still show mojibake until manually repaired"
echo ""