MySQL - Tính Khả Dụng & Khả Năng Mở Rộng
🏗️ High Availability (HA) Architectures
1. 🔄 Replication Solutions
Master-Slave Replication
-- On Master
CREATE USER 'repl'@'%' IDENTIFIED BY 'password';
GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%';
FLUSH TABLES WITH READ LOCK;
SHOW MASTER STATUS;
-- On Slave
CHANGE MASTER TO
MASTER_HOST='master_host',
MASTER_USER='repl',
MASTER_PASSWORD='password',
MASTER_LOG_FILE='mysql-bin.000001',
MASTER_LOG_POS=4;
START SLAVE;
Đặc điểm: - Availability: ~99.9% - RTO: 30 seconds - 2 minutes - RPO: < 1 second với sync replication - Use case: Read scaling, backup
Master-Master Replication
-- Server 1
server-id = 1
auto_increment_increment = 2
auto_increment_offset = 1
-- Server 2
server-id = 2
auto_increment_increment = 2
auto_increment_offset = 2
Đặc điểm: - Availability: ~99.95% - RTO: Immediate failover - RPO: Near zero - Risk: Split-brain scenarios
2. 🎯 MySQL Cluster (NDB)
Architecture
SQL Nodes (mysqld) ← Application
↓
Data Nodes (ndbd) ← Storage
↓
Management Nodes (ndb_mgmd) ← Control
Configuration:
[ndbd default]
NoOfReplicas= 2
DataMemory= 80M
IndexMemory= 18M
[ndbd]
hostname= ndb1.example.com
datadir= /var/lib/mysql-cluster
[ndbd]
hostname= ndb2.example.com
datadir= /var/lib/mysql-cluster
[mysqld]
hostname= sql1.example.com
[mysqld]
hostname= sql2.example.com
Đặc điểm: - Availability: 99.999% - RTO: Milliseconds - RPO: Zero (synchronous) - Scalability: Horizontal scaling
3. 🌊 Semi-Synchronous Replication
-- Install plugin
INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';
-- Enable semi-sync
SET GLOBAL rpl_semi_sync_master_enabled = 1;
SET GLOBAL rpl_semi_sync_slave_enabled = 1;
Benefits: - Data safety: Guaranteed durability - Performance: Good balance - Timeout: Fallback to async on timeout
📈 Scalability Strategies
1. 🔍 Read Scaling
Read Replicas Pattern
import random
from sqlalchemy import create_engine
class DatabaseRouter:
def __init__(self):
self.master = create_engine('mysql://master:3306/db')
self.slaves = [
create_engine('mysql://slave1:3306/db'),
create_engine('mysql://slave2:3306/db'),
create_engine('mysql://slave3:3306/db')
]
def get_read_connection(self):
return random.choice(self.slaves)
def get_write_connection(self):
return self.master
Metrics: - Read QPS: 10x improvement possible - Slave lag: Monitor replication delay - Connection pooling: Essential for performance
Connection Pooling
from sqlalchemy import create_engine
from sqlalchemy.pool import QueuePool
engine = create_engine(
'mysql://user:pass@host/db',
poolclass=QueuePool,
pool_size=20,
max_overflow=0,
pool_pre_ping=True,
pool_recycle=3600
)
2. ✂️ Sharding (Horizontal Partitioning)
Range-Based Sharding
class RangeShardingRouter:
def __init__(self):
self.shards = {
'shard1': {'min': 0, 'max': 1000000},
'shard2': {'min': 1000001, 'max': 2000000},
'shard3': {'min': 2000001, 'max': 3000000}
}
def get_shard(self, user_id):
for shard, range_info in self.shards.items():
if range_info['min'] <= user_id <= range_info['max']:
return shard
raise ValueError("User ID out of range")
Hash-Based Sharding
import hashlib
class HashShardingRouter:
def __init__(self, num_shards=4):
self.num_shards = num_shards
self.shards = [f'shard{i}' for i in range(num_shards)]
def get_shard(self, key):
hash_value = int(hashlib.md5(str(key).encode()).hexdigest(), 16)
shard_index = hash_value % self.num_shards
return self.shards[shard_index]
Directory-Based Sharding
CREATE TABLE shard_directory (
entity_id BIGINT PRIMARY KEY,
shard_name VARCHAR(50),
INDEX idx_shard (shard_name)
);
3. 📊 Partitioning
Range Partitioning
CREATE TABLE sales (
id INT,
sale_date DATE,
amount DECIMAL(10,2)
) PARTITION BY RANGE (YEAR(sale_date)) (
PARTITION p2020 VALUES LESS THAN (2021),
PARTITION p2021 VALUES LESS THAN (2022),
PARTITION p2022 VALUES LESS THAN (2023),
PARTITION p2023 VALUES LESS THAN (2024)
);
Hash Partitioning
CREATE TABLE users (
id INT,
username VARCHAR(50),
email VARCHAR(100)
) PARTITION BY HASH(id) PARTITIONS 4;
List Partitioning
CREATE TABLE orders (
id INT,
region VARCHAR(20),
order_date DATE
) PARTITION BY LIST COLUMNS(region) (
PARTITION p_north VALUES IN ('North', 'Northeast'),
PARTITION p_south VALUES IN ('South', 'Southeast'),
PARTITION p_west VALUES IN ('West', 'Southwest'),
PARTITION p_east VALUES IN ('East')
);
🎯 Load Balancing Strategies
1. 🔄 Application-Level Load Balancing
class MySQLLoadBalancer:
def __init__(self):
self.master = 'mysql-master:3306'
self.slaves = [
'mysql-slave1:3306',
'mysql-slave2:3306',
'mysql-slave3:3306'
]
self.slave_index = 0
def route_query(self, query_type):
if query_type in ['INSERT', 'UPDATE', 'DELETE']:
return self.master
else:
# Round-robin for reads
slave = self.slaves[self.slave_index]
self.slave_index = (self.slave_index + 1) % len(self.slaves)
return slave
2. 🌐 Proxy-Based Load Balancing
ProxySQL Configuration
-- Add backend servers
INSERT INTO mysql_servers(hostgroup_id, hostname, port) VALUES
(0, '192.168.1.10', 3306), -- Master
(1, '192.168.1.11', 3306), -- Slave 1
(1, '192.168.1.12', 3306); -- Slave 2
-- Query routing rules
INSERT INTO mysql_query_rules(rule_id, match_pattern, destination_hostgroup) VALUES
(1, '^SELECT.*', 1), -- Route SELECT to slaves
(2, '^INSERT.*', 0), -- Route INSERT to master
(3, '^UPDATE.*', 0), -- Route UPDATE to master
(4, '^DELETE.*', 0); -- Route DELETE to master
3. 🔧 HAProxy Configuration
global
daemon
defaults
mode tcp
timeout connect 5000ms
timeout client 50000ms
timeout server 50000ms
frontend mysql_frontend
bind *:3306
default_backend mysql_backend
backend mysql_backend
balance roundrobin
server mysql1 192.168.1.10:3306 check
server mysql2 192.168.1.11:3306 check backup
server mysql3 192.168.1.12:3306 check backup
📊 Performance Metrics & Monitoring
1. 🎯 Key Availability Metrics
-- Uptime
SHOW GLOBAL STATUS LIKE 'Uptime';
-- Connection errors
SHOW GLOBAL STATUS LIKE 'Connection_errors%';
-- Slave lag
SHOW SLAVE STATUS\G
-- Look for Seconds_Behind_Master
-- Thread states
SHOW PROCESSLIST;
2. 📈 Scalability Metrics
-- Queries per second
SHOW GLOBAL STATUS LIKE 'Questions';
SHOW GLOBAL STATUS LIKE 'Uptime';
-- QPS = Questions / Uptime
-- Connection usage
SHOW GLOBAL STATUS LIKE 'Threads_connected';
SHOW GLOBAL VARIABLES LIKE 'max_connections';
-- Buffer pool hit ratio
SHOW GLOBAL STATUS LIKE 'Innodb_buffer_pool_read_requests';
SHOW GLOBAL STATUS LIKE 'Innodb_buffer_pool_reads';
-- Hit ratio = (read_requests - reads) / read_requests * 100
3. 🔍 Performance Schema Monitoring
-- Top slow queries
SELECT
DIGEST_TEXT,
COUNT_STAR,
AVG_TIMER_WAIT/1000000000 as avg_time_sec,
SUM_TIMER_WAIT/1000000000 as total_time_sec
FROM performance_schema.events_statements_summary_by_digest
ORDER BY SUM_TIMER_WAIT DESC LIMIT 10;
-- Connection statistics
SELECT * FROM performance_schema.host_summary;
-- IO statistics
SELECT * FROM performance_schema.file_summary_by_instance
ORDER BY sum_timer_wait DESC LIMIT 10;
🚨 Disaster Recovery
1. 💾 Backup Strategies
Logical Backup
# mysqldump
mysqldump --single-transaction --routines --triggers \
--all-databases > backup.sql
# Point-in-time recovery
mysqldump --single-transaction --flush-logs --master-data=2 \
--all-databases > backup.sql
Physical Backup
# Percona XtraBackup
xtrabackup --backup --target-dir=/backup/full/
# Incremental backup
xtrabackup --backup --target-dir=/backup/inc1/ \
--incremental-basedir=/backup/full/
2. 🔄 Failover Procedures
Automatic Failover with MHA
# Install MHA
masterha_check_repl --conf=/etc/mha/app1.cnf
masterha_manager --conf=/etc/mha/app1.cnf
# Manual failover
masterha_master_switch --conf=/etc/mha/app1.cnf \
--master_state=dead \
--new_master_host=slave1
Application-Level Failover
import time
import mysql.connector
from mysql.connector import Error
class FailoverManager:
def __init__(self, master_config, slave_configs):
self.master_config = master_config
self.slave_configs = slave_configs
self.current_master = None
def check_health(self, config):
try:
conn = mysql.connector.connect(**config, connection_timeout=5)
cursor = conn.cursor()
cursor.execute("SELECT 1")
conn.close()
return True
except Error:
return False
def failover_to_slave(self):
for slave_config in self.slave_configs:
if self.check_health(slave_config):
# Promote slave to master
self.current_master = slave_config
return True
return False
📋 Best Practices Summary
✅ High Availability
- Multiple replicas: At least 2 slaves for redundancy
- Cross-datacenter: Geographic distribution
- Health monitoring: Continuous health checks
- Automated failover: Minimize downtime
- Regular testing: DR drills and testing
✅ Scalability
- Read replicas: Scale read operations
- Connection pooling: Efficient connection management
- Query optimization: Proper indexing and queries
- Caching layers: Redis/Memcached integration
- Horizontal sharding: When vertical scaling limits reached
✅ Monitoring
- Real-time metrics: QPS, connections, replication lag
- Alerting: Proactive issue detection
- Performance trending: Capacity planning
- Log analysis: Slow query and error logs
- Regular audits: Configuration and performance reviews
Kiến thức về HA và scalability là must-have cho senior database roles.