fix(pr#2): address CodeRabbit round 3 review findings

- cache: fix LRU race — check expiry before MoveToFront within same lock
  critical section; previously a stale entry could be promoted before
  expiry was verified
- cache: drop stale ETag/Content-Md5/Digest headers when body is
  rewritten by refreshCachedResponseMeta to avoid misrepresenting the
  new body to clients
- ratelimit: cap buckets map at 100k entries with stale-eviction fallback
  and shared overflow bucket to prevent unbounded memory growth under
  high-cardinality traffic
- ratelimit: fix clientRateLimitKey comment — credentials are tried
  second (before IP), not as a "last resort"

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Snider 2026-04-07 09:29:03 +01:00
parent 372326297e
commit e27006de30
2 changed files with 77 additions and 27 deletions

View file

@ -50,27 +50,15 @@ func newCacheStore(maxEntries, maxBytes int) *cacheStore {
func (s *cacheStore) get(key string) *cacheEntry {
s.mu.Lock()
entry, ok := s.entries[key]
if ok {
if elem, exists := s.index[key]; exists {
s.order.MoveToFront(elem)
}
}
s.mu.Unlock()
if !ok {
s.mu.Unlock()
return nil
}
// Check expiry before promoting in the LRU order so we never move a stale
// entry to the front. All expiry checking and eviction happen inside the
// same critical section to avoid a TOCTOU race.
if time.Now().After(entry.expires) {
s.mu.Lock()
// Re-verify the entry pointer is unchanged before evicting. Another
// goroutine may have called set() between us releasing and re-acquiring
// the lock, replacing the entry with a fresh one. Evicting the new
// entry would corrupt s.currentBytes and lose valid cached data.
currentEntry, stillExists := s.entries[key]
if !stillExists || currentEntry != entry {
s.mu.Unlock()
return nil
}
if elem, exists := s.index[key]; exists {
s.order.Remove(elem)
delete(s.index, key)
@ -83,6 +71,12 @@ func (s *cacheStore) get(key string) *cacheEntry {
s.mu.Unlock()
return nil
}
// Only promote to LRU front after confirming the entry is still valid.
if elem, exists := s.index[key]; exists {
s.order.MoveToFront(elem)
}
s.mu.Unlock()
return entry
}
@ -201,15 +195,35 @@ func cacheMiddleware(store *cacheStore, ttl time.Duration) gin.HandlerFunc {
// Serve from cache if a valid entry exists.
if entry := store.get(key); entry != nil {
body := entry.body
metaRewritten := false
if meta := GetRequestMeta(c); meta != nil {
body = refreshCachedResponseMeta(entry.body, meta)
metaRewritten = true
}
// staleValidatorHeader returns true for headers that describe the
// exact bytes of the cached body and must be dropped when the body
// has been rewritten by refreshCachedResponseMeta.
staleValidatorHeader := func(canonical string) bool {
if !metaRewritten {
return false
}
switch canonical {
case "Etag", "Content-Md5", "Digest":
return true
}
return false
}
for k, vals := range entry.headers {
if http.CanonicalHeaderKey(k) == "X-Request-ID" {
canonical := http.CanonicalHeaderKey(k)
if canonical == "X-Request-Id" {
continue
}
if http.CanonicalHeaderKey(k) == "Content-Length" {
if canonical == "Content-Length" {
continue
}
if staleValidatorHeader(canonical) {
continue
}
for _, v := range vals {

View file

@ -18,6 +18,14 @@ import (
const (
rateLimitCleanupInterval = time.Minute
rateLimitStaleAfter = 10 * time.Minute
// rateLimitMaxBuckets caps the total number of tracked keys to prevent
// unbounded memory growth under high-cardinality traffic (e.g. scanning
// bots cycling random IPs). When the cap is reached, new keys that cannot
// evict a stale bucket are routed to a shared overflow bucket so requests
// are still rate-limited rather than bypassing the limiter entirely.
rateLimitMaxBuckets = 100_000
rateLimitOverflowKey = "__overflow__"
)
type rateLimitStore struct {
@ -57,12 +65,39 @@ func (s *rateLimitStore) allow(key string) rateLimitDecision {
s.mu.Lock()
bucket, ok := s.buckets[key]
if !ok || now.Sub(bucket.lastSeen) > rateLimitStaleAfter {
bucket = &rateLimitBucket{
tokens: float64(s.limit),
last: now,
lastSeen: now,
// Enforce the bucket cap before inserting a new entry. First try to
// evict a single stale entry; if none exists and the map is full,
// route the request to the shared overflow bucket so it is still
// rate-limited rather than bypassing the limiter.
if !ok && len(s.buckets) >= rateLimitMaxBuckets {
evicted := false
for k, candidate := range s.buckets {
if now.Sub(candidate.lastSeen) > rateLimitStaleAfter {
delete(s.buckets, k)
evicted = true
break
}
}
if !evicted {
// Cap reached and no stale entry to evict: use overflow bucket.
key = rateLimitOverflowKey
if ob, exists := s.buckets[key]; exists {
bucket = ob
ok = true
}
}
}
if !ok {
bucket = &rateLimitBucket{
tokens: float64(s.limit),
last: now,
lastSeen: now,
}
s.buckets[key] = bucket
} else {
bucket.lastSeen = now
}
s.buckets[key] = bucket
} else {
bucket.lastSeen = now
}
@ -186,9 +221,10 @@ func timeUntilFull(tokens float64, limit int) time.Duration {
}
// clientRateLimitKey derives a bucket key for the request. It prefers a
// validated principal from context (set by auth middleware), then falls back
// to the client IP. Raw credential headers are hashed with SHA-256 when used
// as a last resort so that secrets are never stored in the bucket map.
// validated principal placed in context by auth middleware, then falls back to
// raw credential headers (X-API-Key or Bearer token, hashed with SHA-256 so
// secrets are never stored in the bucket map), and finally falls back to the
// client IP when no credentials are present.
func clientRateLimitKey(c *gin.Context) string {
// Prefer a validated principal placed in context by auth middleware.
if principal, ok := c.Get("principal"); ok && principal != nil {