Global Scale System Design
Tổng Quan
Global Scale Design là các kỹ thuật và patterns để xây dựng hệ thống phục vụ users trên toàn thế giới với low latency, high availability và consistency.
Multi-Region Architecture
Global Distribution Strategy
class GlobalDistribution:
def __init__(self):
self.regions = {
'us-east-1': {'lat': 39.0458, 'lng': -76.6413},
'eu-west-1': {'lat': 53.4084, 'lng': -8.2439},
'ap-southeast-1': {'lat': 1.3521, 'lng': 103.8198},
'ap-northeast-1': {'lat': 35.6762, 'lng': 139.6503}
}
def find_closest_region(self, user_location):
min_distance = float('inf')
closest_region = None
for region, coordinates in self.regions.items():
distance = self.calculate_distance(user_location, coordinates)
if distance < min_distance:
min_distance = distance
closest_region = region
return closest_region
def route_traffic(self, user_request):
closest_region = self.find_closest_region(user_request.location)
return self.get_regional_endpoint(closest_region)
Data Replication Strategies
class GlobalDataReplication:
def __init__(self):
self.replication_strategies = {
'master_slave': self.master_slave_replication,
'master_master': self.master_master_replication,
'eventual_consistency': self.eventual_consistency_replication
}
def replicate_data(self, data, strategy='eventual_consistency'):
return self.replication_strategies[strategy](data)
def eventual_consistency_replication(self, data):
# Asynchronous replication to all regions
primary_region = self.write_to_primary(data)
# Async replication to other regions
for region in self.get_secondary_regions():
self.async_replicate(data, region)
return primary_region
Content Delivery Networks
CDN Architecture
class GlobalCDN:
def __init__(self):
self.edge_servers = self.discover_edge_servers()
self.origin_servers = self.get_origin_servers()
def cache_content(self, content_id, content_data):
# Determine cache strategy based on content type
if self.is_static_content(content_data):
# Cache at all edge servers
self.cache_at_all_edges(content_id, content_data)
elif self.is_popular_content(content_id):
# Cache at regional hubs
self.cache_at_regional_hubs(content_id, content_data)
else:
# Cache on-demand
self.cache_on_demand(content_id, content_data)
def serve_content(self, user_location, content_id):
closest_edge = self.find_closest_edge_server(user_location)
if self.is_cached(closest_edge, content_id):
return closest_edge.serve_content(content_id)
else:
# Cache miss: fetch from origin
content = self.fetch_from_origin(content_id)
closest_edge.cache_content(content_id, content)
return content
Smart Routing
DNS-based routing:
- Geolocation routing
- Latency-based routing
- Health check based routing
Anycast routing:
- Same IP announced from multiple locations
- BGP routing to closest server
- Automatic failover
Cross-Region Consistency
CAP Theorem Considerations
class DistributedConsistency:
def __init__(self):
self.consistency_level = 'eventual' # strong, eventual, weak
def write_data(self, key, value, consistency_level=None):
level = consistency_level or self.consistency_level
if level == 'strong':
return self.strong_consistency_write(key, value)
elif level == 'eventual':
return self.eventual_consistency_write(key, value)
else:
return self.weak_consistency_write(key, value)
def strong_consistency_write(self, key, value):
# Write to majority of nodes before acknowledging
write_count = 0
required_writes = len(self.nodes) // 2 + 1
for node in self.nodes:
if node.write(key, value):
write_count += 1
if write_count >= required_writes:
return True
return False # Failed to achieve consistency
Conflict Resolution
class ConflictResolution:
def resolve_conflicts(self, conflicting_values):
# Last Write Wins (LWW)
return max(conflicting_values, key=lambda x: x.timestamp)
def vector_clock_resolution(self, values_with_clocks):
# Use vector clocks for partial ordering
causally_ordered = self.sort_by_causality(values_with_clocks)
return causally_ordered[-1] # Most recent in causal order
def application_specific_resolution(self, conflicts, merge_function):
# Custom business logic for conflict resolution
return merge_function(conflicts)
Performance Optimization
Global Load Balancing
class GlobalLoadBalancer:
def __init__(self):
self.health_checkers = {}
self.performance_metrics = {}
def route_request(self, request):
available_regions = self.get_healthy_regions()
# Calculate routing score for each region
scores = {}
for region in available_regions:
scores[region] = self.calculate_routing_score(
request.user_location,
region,
self.performance_metrics[region]
)
# Route to best scoring region
best_region = max(scores, key=scores.get)
return self.forward_to_region(request, best_region)
def calculate_routing_score(self, user_location, region, metrics):
distance_score = 1 / self.calculate_distance(user_location, region)
latency_score = 1 / metrics.avg_latency
capacity_score = 1 - metrics.cpu_utilization
return distance_score * 0.4 + latency_score * 0.4 + capacity_score * 0.2
Caching Strategies
class MultiTierCaching:
def __init__(self):
self.browser_cache = BrowserCache()
self.cdn_cache = CDNCache()
self.application_cache = ApplicationCache()
self.database_cache = DatabaseCache()
def get_data(self, key):
# Check each cache tier
for cache in [self.browser_cache, self.cdn_cache,
self.application_cache, self.database_cache]:
data = cache.get(key)
if data:
# Update higher-level caches
self.populate_upstream_caches(key, data)
return data
# Cache miss: fetch from source
data = self.fetch_from_source(key)
self.populate_all_caches(key, data)
return data
Network Optimization
Edge Computing
class EdgeComputing:
def __init__(self):
self.edge_nodes = self.discover_edge_nodes()
self.workload_scheduler = WorkloadScheduler()
def deploy_to_edge(self, function_code, deployment_criteria):
suitable_edges = []
for edge in self.edge_nodes:
if self.meets_criteria(edge, deployment_criteria):
suitable_edges.append(edge)
# Deploy to selected edge nodes
for edge in suitable_edges:
edge.deploy_function(function_code)
def execute_at_edge(self, user_request, function_name):
closest_edge = self.find_closest_edge(user_request.location)
if closest_edge.has_function(function_name):
return closest_edge.execute(function_name, user_request)
else:
# Fallback to cloud execution
return self.execute_in_cloud(function_name, user_request)
Bandwidth Optimization
class BandwidthOptimization:
def optimize_data_transfer(self, data, target_location):
# Compress data
compressed_data = self.compress(data)
# Select optimal protocol
protocol = self.select_protocol(target_location, len(compressed_data))
# Use delta compression for similar data
if self.has_similar_cached_data(data):
delta = self.create_delta(data)
return self.transfer_delta(delta, protocol)
return self.transfer_full_data(compressed_data, protocol)
def adaptive_bitrate(self, content, user_bandwidth):
if user_bandwidth > 5000: # 5 Mbps
return content.high_quality
elif user_bandwidth > 1000: # 1 Mbps
return content.medium_quality
else:
return content.low_quality
Monitoring & Observability
Global Monitoring
class GlobalMonitoring:
def __init__(self):
self.regional_monitors = {}
self.global_dashboard = GlobalDashboard()
def collect_global_metrics(self):
global_metrics = {
'total_requests': 0,
'avg_latency': 0,
'error_rate': 0,
'regional_breakdown': {}
}
for region, monitor in self.regional_monitors.items():
regional_metrics = monitor.get_metrics()
global_metrics['regional_breakdown'][region] = regional_metrics
# Aggregate global metrics
global_metrics['total_requests'] += regional_metrics['requests']
global_metrics['avg_latency'] += regional_metrics['latency']
global_metrics['error_rate'] += regional_metrics['errors']
# Calculate weighted averages
num_regions = len(self.regional_monitors)
global_metrics['avg_latency'] /= num_regions
global_metrics['error_rate'] /= num_regions
return global_metrics
Cost Optimization
Regional Cost Analysis
class GlobalCostOptimizer:
def __init__(self):
self.regional_costs = self.load_regional_cost_data()
self.usage_patterns = self.analyze_usage_patterns()
def optimize_resource_placement(self, workload_requirements):
cost_analysis = {}
for region in self.regions:
cost = self.calculate_total_cost(region, workload_requirements)
latency = self.estimate_latency(region, workload_requirements.user_base)
cost_analysis[region] = {
'cost': cost,
'latency': latency,
'cost_per_ms': cost / latency
}
# Find optimal balance between cost and performance
return self.find_pareto_optimal_regions(cost_analysis)
Disaster Recovery
Global Failover
class GlobalDisasterRecovery:
def __init__(self):
self.primary_regions = ['us-east-1', 'eu-west-1']
self.backup_regions = ['us-west-2', 'ap-southeast-1']
def initiate_failover(self, failed_region):
# Identify backup region
backup_region = self.select_backup_region(failed_region)
# Redirect traffic
self.update_dns_records(failed_region, backup_region)
# Restore data from backups
self.restore_data_to_backup_region(backup_region)
# Scale up backup region
self.scale_up_resources(backup_region)
# Notify operations team
self.send_failover_notification(failed_region, backup_region)
Next Steps
Nội dung này sẽ được mở rộng thêm với: - Cross-region data synchronization patterns - Global state management strategies - International compliance considerations - Multi-cloud global deployment - 5G và edge computing integration