Caching Strategies - Performance Optimization
Tổng quan
Caching stores frequently accessed data in faster storage để reduce latency và improve system performance.
Cache Levels
Browser Cache
// HTTP headers for browser caching
const headers = {
'Cache-Control': 'public, max-age=3600', // 1 hour
'ETag': '"abc123"',
'Last-Modified': 'Wed, 21 Oct 2015 07:28:00 GMT'
};
CDN Cache
// CDN configuration
const cdnConfig = {
static_assets: {
cache_duration: '1 year',
compression: 'gzip'
},
dynamic_content: {
cache_duration: '5 minutes',
cache_key: ['url', 'user_type']
}
};
Application Cache
# In-memory cache
class ApplicationCache:
def __init__(self, max_size=1000):
self.cache = {}
self.max_size = max_size
def get(self, key):
return self.cache.get(key)
def set(self, key, value, ttl=300):
if len(self.cache) >= self.max_size:
self.evict_lru()
self.cache[key] = {
'value': value,
'expires_at': time.time() + ttl
}
Database Cache
# Redis cache
import redis
class RedisCache:
def __init__(self):
self.redis = redis.Redis(host='localhost', port=6379, db=0)
def get(self, key):
value = self.redis.get(key)
return json.loads(value) if value else None
def set(self, key, value, ttl=3600):
self.redis.setex(key, ttl, json.dumps(value))
Cache Patterns
Cache-Aside (Lazy Loading)
def get_user(user_id):
# Check cache first
user = cache.get(f"user:{user_id}")
if user:
return user
# Cache miss - fetch from database
user = database.get_user(user_id)
if user:
cache.set(f"user:{user_id}", user, ttl=3600)
return user
def update_user(user_id, data):
# Update database
database.update_user(user_id, data)
# Invalidate cache
cache.delete(f"user:{user_id}")
Write-Through
def update_user_write_through(user_id, data):
# Update database
database.update_user(user_id, data)
# Update cache
updated_user = database.get_user(user_id)
cache.set(f"user:{user_id}", updated_user, ttl=3600)
Write-Behind (Write-Back)
class WriteBehindCache:
def __init__(self):
self.cache = {}
self.dirty_keys = set()
def set(self, key, value):
self.cache[key] = value
self.dirty_keys.add(key)
def flush_to_database(self):
for key in self.dirty_keys:
if key in self.cache:
database.update(key, self.cache[key])
self.dirty_keys.clear()
Refresh-Ahead
class RefreshAheadCache:
def get(self, key):
value = cache.get(key)
# Check if refresh needed (before expiration)
if self.should_refresh(key):
self.async_refresh(key)
return value
def should_refresh(self, key):
ttl = cache.ttl(key)
return ttl < self.refresh_threshold
def async_refresh(self, key):
# Refresh in background
threading.Thread(target=self.refresh_key, args=[key]).start()
Cache Invalidation
TTL (Time-To-Live)
def set_with_ttl(key, value, seconds=3600):
cache.setex(key, seconds, value)
Event-Based Invalidation
class EventDrivenCache:
def __init__(self):
self.event_bus.subscribe('user.updated', self.invalidate_user_cache)
def invalidate_user_cache(self, event):
user_id = event.data['user_id']
cache.delete(f"user:{user_id}")
cache.delete(f"user:profile:{user_id}")
Tag-Based Invalidation
class TagBasedCache:
def set(self, key, value, tags=None):
cache.set(key, value)
if tags:
for tag in tags:
cache.sadd(f"tag:{tag}", key)
def invalidate_by_tag(self, tag):
keys = cache.smembers(f"tag:{tag}")
if keys:
cache.delete(*keys)
cache.delete(f"tag:{tag}")
Distributed Caching
Redis Cluster
from rediscluster import RedisCluster
class DistributedCache:
def __init__(self):
self.redis = RedisCluster(
startup_nodes=[
{"host": "redis-1", "port": "7000"},
{"host": "redis-2", "port": "7000"},
{"host": "redis-3", "port": "7000"}
],
decode_responses=True
)
Memcached
import memcache
class MemcachedClient:
def __init__(self):
self.mc = memcache.Client(['127.0.0.1:11211'], debug=0)
def get(self, key):
return self.mc.get(key)
def set(self, key, value, expiration=3600):
return self.mc.set(key, value, time=expiration)
Cache Eviction Policies
LRU (Least Recently Used)
from collections import OrderedDict
class LRUCache:
def __init__(self, capacity):
self.capacity = capacity
self.cache = OrderedDict()
def get(self, key):
if key in self.cache:
self.cache.move_to_end(key)
return self.cache[key]
return None
def set(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False)
LFU (Least Frequently Used)
class LFUCache:
def __init__(self, capacity):
self.capacity = capacity
self.cache = {}
self.frequencies = defaultdict(int)
def get(self, key):
if key in self.cache:
self.frequencies[key] += 1
return self.cache[key]
return None
def set(self, key, value):
if len(self.cache) >= self.capacity and key not in self.cache:
lfu_key = min(self.frequencies, key=self.frequencies.get)
del self.cache[lfu_key]
del self.frequencies[lfu_key]
self.cache[key] = value
self.frequencies[key] += 1
Cache Warming
Proactive Cache Warming
class CacheWarmer:
def warm_user_cache(self, user_ids):
for user_id in user_ids:
user = database.get_user(user_id)
cache.set(f"user:{user_id}", user, ttl=3600)
def warm_popular_content(self):
popular_items = analytics.get_popular_items()
for item in popular_items:
cache.set(f"item:{item.id}", item, ttl=7200)
Performance Monitoring
Cache Metrics
class CacheMetrics:
def get_metrics(self):
return {
'hit_rate': self.cache_hits / (self.cache_hits + self.cache_misses),
'miss_rate': self.cache_misses / (self.cache_hits + self.cache_misses),
'eviction_rate': self.evictions_per_second,
'memory_usage': self.get_memory_usage(),
'response_time': self.avg_response_time
}
Best Practices
Cache Design
- Set appropriate TTL values
- Use cache keys consistently
- Monitor hit rates
- Plan for cache failures
- Implement cache warming
Common Pitfalls
# Anti-pattern: Cache stampede
def get_popular_data():
data = cache.get('popular_data')
if not data:
# Multiple threads may execute this
data = expensive_database_operation()
cache.set('popular_data', data)
return data
# Better: Use locking
def get_popular_data_safe():
data = cache.get('popular_data')
if not data:
with cache_lock('popular_data'):
data = cache.get('popular_data') # Double-check
if not data:
data = expensive_database_operation()
cache.set('popular_data', data)
return data
Next Steps
- 📚 Học về Message Queues
- 🎯 Practice cache implementation
- 🏗️ Explore cache architectures
- 💻 Setup cache monitoring
Content sẽ được expand với advanced caching patterns.