Global Scale System Design

Tổng Quan

Global Scale Design là các kỹ thuật và patterns để xây dựng hệ thống phục vụ users trên toàn thế giới với low latency, high availability và consistency.

Multi-Region Architecture

Global Distribution Strategy

class GlobalDistribution:
    def __init__(self):
        self.regions = {
            'us-east-1': {'lat': 39.0458, 'lng': -76.6413},
            'eu-west-1': {'lat': 53.4084, 'lng': -8.2439},
            'ap-southeast-1': {'lat': 1.3521, 'lng': 103.8198},
            'ap-northeast-1': {'lat': 35.6762, 'lng': 139.6503}
        }

    def find_closest_region(self, user_location):
        min_distance = float('inf')
        closest_region = None

        for region, coordinates in self.regions.items():
            distance = self.calculate_distance(user_location, coordinates)
            if distance < min_distance:
                min_distance = distance
                closest_region = region

        return closest_region

    def route_traffic(self, user_request):
        closest_region = self.find_closest_region(user_request.location)
        return self.get_regional_endpoint(closest_region)

Data Replication Strategies

class GlobalDataReplication:
    def __init__(self):
        self.replication_strategies = {
            'master_slave': self.master_slave_replication,
            'master_master': self.master_master_replication,
            'eventual_consistency': self.eventual_consistency_replication
        }

    def replicate_data(self, data, strategy='eventual_consistency'):
        return self.replication_strategies[strategy](data)

    def eventual_consistency_replication(self, data):
        # Asynchronous replication to all regions
        primary_region = self.write_to_primary(data)

        # Async replication to other regions
        for region in self.get_secondary_regions():
            self.async_replicate(data, region)

        return primary_region

Content Delivery Networks

CDN Architecture

class GlobalCDN:
    def __init__(self):
        self.edge_servers = self.discover_edge_servers()
        self.origin_servers = self.get_origin_servers()

    def cache_content(self, content_id, content_data):
        # Determine cache strategy based on content type
        if self.is_static_content(content_data):
            # Cache at all edge servers
            self.cache_at_all_edges(content_id, content_data)
        elif self.is_popular_content(content_id):
            # Cache at regional hubs
            self.cache_at_regional_hubs(content_id, content_data)
        else:
            # Cache on-demand
            self.cache_on_demand(content_id, content_data)

    def serve_content(self, user_location, content_id):
        closest_edge = self.find_closest_edge_server(user_location)

        if self.is_cached(closest_edge, content_id):
            return closest_edge.serve_content(content_id)
        else:
            # Cache miss: fetch from origin
            content = self.fetch_from_origin(content_id)
            closest_edge.cache_content(content_id, content)
            return content

Smart Routing

DNS-based routing:
- Geolocation routing
- Latency-based routing
- Health check based routing

Anycast routing:
- Same IP announced from multiple locations
- BGP routing to closest server
- Automatic failover

Cross-Region Consistency

CAP Theorem Considerations

class DistributedConsistency:
    def __init__(self):
        self.consistency_level = 'eventual'  # strong, eventual, weak

    def write_data(self, key, value, consistency_level=None):
        level = consistency_level or self.consistency_level

        if level == 'strong':
            return self.strong_consistency_write(key, value)
        elif level == 'eventual':
            return self.eventual_consistency_write(key, value)
        else:
            return self.weak_consistency_write(key, value)

    def strong_consistency_write(self, key, value):
        # Write to majority of nodes before acknowledging
        write_count = 0
        required_writes = len(self.nodes) // 2 + 1

        for node in self.nodes:
            if node.write(key, value):
                write_count += 1
                if write_count >= required_writes:
                    return True

        return False  # Failed to achieve consistency

Conflict Resolution

class ConflictResolution:
    def resolve_conflicts(self, conflicting_values):
        # Last Write Wins (LWW)
        return max(conflicting_values, key=lambda x: x.timestamp)

    def vector_clock_resolution(self, values_with_clocks):
        # Use vector clocks for partial ordering
        causally_ordered = self.sort_by_causality(values_with_clocks)
        return causally_ordered[-1]  # Most recent in causal order

    def application_specific_resolution(self, conflicts, merge_function):
        # Custom business logic for conflict resolution
        return merge_function(conflicts)

Performance Optimization

Global Load Balancing

class GlobalLoadBalancer:
    def __init__(self):
        self.health_checkers = {}
        self.performance_metrics = {}

    def route_request(self, request):
        available_regions = self.get_healthy_regions()

        # Calculate routing score for each region
        scores = {}
        for region in available_regions:
            scores[region] = self.calculate_routing_score(
                request.user_location,
                region,
                self.performance_metrics[region]
            )

        # Route to best scoring region
        best_region = max(scores, key=scores.get)
        return self.forward_to_region(request, best_region)

    def calculate_routing_score(self, user_location, region, metrics):
        distance_score = 1 / self.calculate_distance(user_location, region)
        latency_score = 1 / metrics.avg_latency
        capacity_score = 1 - metrics.cpu_utilization

        return distance_score * 0.4 + latency_score * 0.4 + capacity_score * 0.2

Caching Strategies

class MultiTierCaching:
    def __init__(self):
        self.browser_cache = BrowserCache()
        self.cdn_cache = CDNCache()
        self.application_cache = ApplicationCache()
        self.database_cache = DatabaseCache()

    def get_data(self, key):
        # Check each cache tier
        for cache in [self.browser_cache, self.cdn_cache, 
                     self.application_cache, self.database_cache]:
            data = cache.get(key)
            if data:
                # Update higher-level caches
                self.populate_upstream_caches(key, data)
                return data

        # Cache miss: fetch from source
        data = self.fetch_from_source(key)
        self.populate_all_caches(key, data)
        return data

Network Optimization

Edge Computing

class EdgeComputing:
    def __init__(self):
        self.edge_nodes = self.discover_edge_nodes()
        self.workload_scheduler = WorkloadScheduler()

    def deploy_to_edge(self, function_code, deployment_criteria):
        suitable_edges = []

        for edge in self.edge_nodes:
            if self.meets_criteria(edge, deployment_criteria):
                suitable_edges.append(edge)

        # Deploy to selected edge nodes
        for edge in suitable_edges:
            edge.deploy_function(function_code)

    def execute_at_edge(self, user_request, function_name):
        closest_edge = self.find_closest_edge(user_request.location)

        if closest_edge.has_function(function_name):
            return closest_edge.execute(function_name, user_request)
        else:
            # Fallback to cloud execution
            return self.execute_in_cloud(function_name, user_request)

Bandwidth Optimization

class BandwidthOptimization:
    def optimize_data_transfer(self, data, target_location):
        # Compress data
        compressed_data = self.compress(data)

        # Select optimal protocol
        protocol = self.select_protocol(target_location, len(compressed_data))

        # Use delta compression for similar data
        if self.has_similar_cached_data(data):
            delta = self.create_delta(data)
            return self.transfer_delta(delta, protocol)

        return self.transfer_full_data(compressed_data, protocol)

    def adaptive_bitrate(self, content, user_bandwidth):
        if user_bandwidth > 5000:  # 5 Mbps
            return content.high_quality
        elif user_bandwidth > 1000:  # 1 Mbps
            return content.medium_quality
        else:
            return content.low_quality

Monitoring & Observability

Global Monitoring

class GlobalMonitoring:
    def __init__(self):
        self.regional_monitors = {}
        self.global_dashboard = GlobalDashboard()

    def collect_global_metrics(self):
        global_metrics = {
            'total_requests': 0,
            'avg_latency': 0,
            'error_rate': 0,
            'regional_breakdown': {}
        }

        for region, monitor in self.regional_monitors.items():
            regional_metrics = monitor.get_metrics()
            global_metrics['regional_breakdown'][region] = regional_metrics

            # Aggregate global metrics
            global_metrics['total_requests'] += regional_metrics['requests']
            global_metrics['avg_latency'] += regional_metrics['latency']
            global_metrics['error_rate'] += regional_metrics['errors']

        # Calculate weighted averages
        num_regions = len(self.regional_monitors)
        global_metrics['avg_latency'] /= num_regions
        global_metrics['error_rate'] /= num_regions

        return global_metrics

Cost Optimization

Regional Cost Analysis

class GlobalCostOptimizer:
    def __init__(self):
        self.regional_costs = self.load_regional_cost_data()
        self.usage_patterns = self.analyze_usage_patterns()

    def optimize_resource_placement(self, workload_requirements):
        cost_analysis = {}

        for region in self.regions:
            cost = self.calculate_total_cost(region, workload_requirements)
            latency = self.estimate_latency(region, workload_requirements.user_base)

            cost_analysis[region] = {
                'cost': cost,
                'latency': latency,
                'cost_per_ms': cost / latency
            }

        # Find optimal balance between cost and performance
        return self.find_pareto_optimal_regions(cost_analysis)

Disaster Recovery

Global Failover

class GlobalDisasterRecovery:
    def __init__(self):
        self.primary_regions = ['us-east-1', 'eu-west-1']
        self.backup_regions = ['us-west-2', 'ap-southeast-1']

    def initiate_failover(self, failed_region):
        # Identify backup region
        backup_region = self.select_backup_region(failed_region)

        # Redirect traffic
        self.update_dns_records(failed_region, backup_region)

        # Restore data from backups
        self.restore_data_to_backup_region(backup_region)

        # Scale up backup region
        self.scale_up_resources(backup_region)

        # Notify operations team
        self.send_failover_notification(failed_region, backup_region)

Next Steps

Nội dung này sẽ được mở rộng thêm với: - Cross-region data synchronization patterns - Global state management strategies - International compliance considerations - Multi-cloud global deployment - 5G và edge computing integration