Caching Strategies - Performance Optimization

Tổng quan

Caching stores frequently accessed data in faster storage để reduce latency và improve system performance.

Cache Levels

Browser Cache

// HTTP headers for browser caching
const headers = {
    'Cache-Control': 'public, max-age=3600',  // 1 hour
    'ETag': '"abc123"',
    'Last-Modified': 'Wed, 21 Oct 2015 07:28:00 GMT'
};

CDN Cache

// CDN configuration
const cdnConfig = {
    static_assets: {
        cache_duration: '1 year',
        compression: 'gzip'
    },
    dynamic_content: {
        cache_duration: '5 minutes',
        cache_key: ['url', 'user_type']
    }
};

Application Cache

# In-memory cache
class ApplicationCache:
    def __init__(self, max_size=1000):
        self.cache = {}
        self.max_size = max_size

    def get(self, key):
        return self.cache.get(key)

    def set(self, key, value, ttl=300):
        if len(self.cache) >= self.max_size:
            self.evict_lru()
        self.cache[key] = {
            'value': value,
            'expires_at': time.time() + ttl
        }

Database Cache

# Redis cache
import redis

class RedisCache:
    def __init__(self):
        self.redis = redis.Redis(host='localhost', port=6379, db=0)

    def get(self, key):
        value = self.redis.get(key)
        return json.loads(value) if value else None

    def set(self, key, value, ttl=3600):
        self.redis.setex(key, ttl, json.dumps(value))

Cache Patterns

Cache-Aside (Lazy Loading)

def get_user(user_id):
    # Check cache first
    user = cache.get(f"user:{user_id}")
    if user:
        return user

    # Cache miss - fetch from database
    user = database.get_user(user_id)
    if user:
        cache.set(f"user:{user_id}", user, ttl=3600)

    return user

def update_user(user_id, data):
    # Update database
    database.update_user(user_id, data)

    # Invalidate cache
    cache.delete(f"user:{user_id}")

Write-Through

def update_user_write_through(user_id, data):
    # Update database
    database.update_user(user_id, data)

    # Update cache
    updated_user = database.get_user(user_id)
    cache.set(f"user:{user_id}", updated_user, ttl=3600)

Write-Behind (Write-Back)

class WriteBehindCache:
    def __init__(self):
        self.cache = {}
        self.dirty_keys = set()

    def set(self, key, value):
        self.cache[key] = value
        self.dirty_keys.add(key)

    def flush_to_database(self):
        for key in self.dirty_keys:
            if key in self.cache:
                database.update(key, self.cache[key])
        self.dirty_keys.clear()

Refresh-Ahead

class RefreshAheadCache:
    def get(self, key):
        value = cache.get(key)

        # Check if refresh needed (before expiration)
        if self.should_refresh(key):
            self.async_refresh(key)

        return value

    def should_refresh(self, key):
        ttl = cache.ttl(key)
        return ttl < self.refresh_threshold

    def async_refresh(self, key):
        # Refresh in background
        threading.Thread(target=self.refresh_key, args=[key]).start()

Cache Invalidation

TTL (Time-To-Live)

def set_with_ttl(key, value, seconds=3600):
    cache.setex(key, seconds, value)

Event-Based Invalidation

class EventDrivenCache:
    def __init__(self):
        self.event_bus.subscribe('user.updated', self.invalidate_user_cache)

    def invalidate_user_cache(self, event):
        user_id = event.data['user_id']
        cache.delete(f"user:{user_id}")
        cache.delete(f"user:profile:{user_id}")

Tag-Based Invalidation

class TagBasedCache:
    def set(self, key, value, tags=None):
        cache.set(key, value)
        if tags:
            for tag in tags:
                cache.sadd(f"tag:{tag}", key)

    def invalidate_by_tag(self, tag):
        keys = cache.smembers(f"tag:{tag}")
        if keys:
            cache.delete(*keys)
            cache.delete(f"tag:{tag}")

Distributed Caching

Redis Cluster

from rediscluster import RedisCluster

class DistributedCache:
    def __init__(self):
        self.redis = RedisCluster(
            startup_nodes=[
                {"host": "redis-1", "port": "7000"},
                {"host": "redis-2", "port": "7000"},
                {"host": "redis-3", "port": "7000"}
            ],
            decode_responses=True
        )

Memcached

import memcache

class MemcachedClient:
    def __init__(self):
        self.mc = memcache.Client(['127.0.0.1:11211'], debug=0)

    def get(self, key):
        return self.mc.get(key)

    def set(self, key, value, expiration=3600):
        return self.mc.set(key, value, time=expiration)

Cache Eviction Policies

LRU (Least Recently Used)

from collections import OrderedDict

class LRUCache:
    def __init__(self, capacity):
        self.capacity = capacity
        self.cache = OrderedDict()

    def get(self, key):
        if key in self.cache:
            self.cache.move_to_end(key)
            return self.cache[key]
        return None

    def set(self, key, value):
        if key in self.cache:
            self.cache.move_to_end(key)
        self.cache[key] = value

        if len(self.cache) > self.capacity:
            self.cache.popitem(last=False)

LFU (Least Frequently Used)

class LFUCache:
    def __init__(self, capacity):
        self.capacity = capacity
        self.cache = {}
        self.frequencies = defaultdict(int)

    def get(self, key):
        if key in self.cache:
            self.frequencies[key] += 1
            return self.cache[key]
        return None

    def set(self, key, value):
        if len(self.cache) >= self.capacity and key not in self.cache:
            lfu_key = min(self.frequencies, key=self.frequencies.get)
            del self.cache[lfu_key]
            del self.frequencies[lfu_key]

        self.cache[key] = value
        self.frequencies[key] += 1

Cache Warming

Proactive Cache Warming

class CacheWarmer:
    def warm_user_cache(self, user_ids):
        for user_id in user_ids:
            user = database.get_user(user_id)
            cache.set(f"user:{user_id}", user, ttl=3600)

    def warm_popular_content(self):
        popular_items = analytics.get_popular_items()
        for item in popular_items:
            cache.set(f"item:{item.id}", item, ttl=7200)

Performance Monitoring

Cache Metrics

class CacheMetrics:
    def get_metrics(self):
        return {
            'hit_rate': self.cache_hits / (self.cache_hits + self.cache_misses),
            'miss_rate': self.cache_misses / (self.cache_hits + self.cache_misses),
            'eviction_rate': self.evictions_per_second,
            'memory_usage': self.get_memory_usage(),
            'response_time': self.avg_response_time
        }

Best Practices

Cache Design

Set appropriate TTL values
Use cache keys consistently
Monitor hit rates
Plan for cache failures
Implement cache warming

Common Pitfalls

# Anti-pattern: Cache stampede
def get_popular_data():
    data = cache.get('popular_data')
    if not data:
        # Multiple threads may execute this
        data = expensive_database_operation()
        cache.set('popular_data', data)
    return data

# Better: Use locking
def get_popular_data_safe():
    data = cache.get('popular_data')
    if not data:
        with cache_lock('popular_data'):
            data = cache.get('popular_data')  # Double-check
            if not data:
                data = expensive_database_operation()
                cache.set('popular_data', data)
    return data

Next Steps

📚 Học về Message Queues
🎯 Practice cache implementation
🏗️ Explore cache architectures
💻 Setup cache monitoring

Content sẽ được expand với advanced caching patterns.