// SPDX-License-Identifier: EUPL-1.2

package ratelimit

import (
	"context"
	"io"
	"io/fs"
	"iter"
	"maps"
	"net/http"
	"net/url"
	"slices"
	"sync"
	"time"

	core "dappco.re/go/core"
	"gopkg.in/yaml.v3"
)

// Provider identifies an LLM provider for quota profiles.
//
//	provider := ProviderOpenAI
type Provider string

const (
	// ProviderGemini is Google's Gemini family (default).
	ProviderGemini Provider = "gemini"
	// ProviderOpenAI is OpenAI's GPT/o-series family.
	ProviderOpenAI Provider = "openai"
	// ProviderAnthropic is Anthropic's Claude family.
	ProviderAnthropic Provider = "anthropic"
	// ProviderLocal is for local inference (Ollama, MLX, llama.cpp).
	ProviderLocal Provider = "local"
)

const (
	defaultStateDirName         = ".core"
	defaultYAMLStateFile        = "ratelimits.yaml"
	defaultSQLiteStateFile      = "ratelimits.db"
	backendYAML                 = "yaml"
	backendSQLite               = "sqlite"
	countTokensErrorBodyLimit   = 8 * 1024
	countTokensSuccessBodyLimit = 1 * 1024 * 1024
)

// ModelQuota defines the rate limits for a specific model.
//
//	quota := ModelQuota{MaxRPM: 60, MaxTPM: 90000, MaxRPD: 1000}
type ModelQuota struct {
	MaxRPM int `yaml:"max_rpm"` // Requests per minute (0 = unlimited)
	MaxTPM int `yaml:"max_tpm"` // Tokens per minute (0 = unlimited)
	MaxRPD int `yaml:"max_rpd"` // Requests per day (0 = unlimited)
}

// ProviderProfile bundles model quotas for a provider.
//
//	profile := ProviderProfile{Provider: ProviderGemini, Models: DefaultProfiles()[ProviderGemini].Models}
type ProviderProfile struct {
	// Provider identifies the provider that owns the profile.
	Provider Provider `yaml:"provider"`
	// Models maps model names to quotas.
	Models map[string]ModelQuota `yaml:"models"`
}

// Config controls RateLimiter initialisation.
//
//	cfg := Config{Providers: []Provider{ProviderGemini}, FilePath: "/tmp/ratelimits.yaml"}
type Config struct {
	// FilePath overrides the default state file location.
	// If empty, defaults to ~/.core/ratelimits.yaml.
	FilePath string `yaml:"file_path,omitempty"`

	// Backend selects the persistence backend: "yaml" (default) or "sqlite".
	// An empty string is treated as "yaml" for backward compatibility.
	Backend string `yaml:"backend,omitempty"`

	// Quotas sets per-model rate limits directly.
	// These are merged on top of any provider profile defaults.
	Quotas map[string]ModelQuota `yaml:"quotas,omitempty"`

	// Providers lists provider profiles to load.
	// If empty and Quotas is also empty, Gemini defaults are used.
	Providers []Provider `yaml:"providers,omitempty"`
}

// TokenEntry records a token usage event.
//
//	entry := TokenEntry{Time: time.Now(), Count: 512}
type TokenEntry struct {
	Time  time.Time `yaml:"time"`
	Count int       `yaml:"count"`
}

// UsageStats tracks usage history for a model.
//
//	stats := UsageStats{DayStart: time.Now(), DayCount: 1}
type UsageStats struct {
	Requests []time.Time  `yaml:"requests"` // Sliding window (1m)
	Tokens   []TokenEntry `yaml:"tokens"`   // Sliding window (1m)
	// DayStart is the start of the rolling 24-hour window.
	DayStart time.Time `yaml:"day_start"`
	// DayCount is the number of requests recorded in the rolling 24-hour window.
	DayCount int `yaml:"day_count"`
}

// RateLimiter manages rate limits across multiple models.
//
//	rl, err := New()
//	if err != nil { /* handle error */ }
//	defer rl.Close()
type RateLimiter struct {
	mu sync.RWMutex
	// Quotas holds the configured per-model limits.
	Quotas map[string]ModelQuota `yaml:"quotas"`
	// State holds per-model usage windows.
	State    map[string]*UsageStats `yaml:"state"`
	filePath string
	sqlite   *sqliteStore // non-nil when backend is "sqlite"
}

// DefaultProfiles returns pre-configured quota profiles for each provider.
// Values are based on published rate limits as of Feb 2026.
//
//	profiles := DefaultProfiles()
//	openAI := profiles[ProviderOpenAI]
func DefaultProfiles() map[Provider]ProviderProfile {
	return map[Provider]ProviderProfile{
		ProviderGemini: {
			Provider: ProviderGemini,
			Models: map[string]ModelQuota{
				"gemini-3-pro-preview":   {MaxRPM: 150, MaxTPM: 1000000, MaxRPD: 1000},
				"gemini-3-flash-preview": {MaxRPM: 150, MaxTPM: 1000000, MaxRPD: 1000},
				"gemini-2.5-pro":         {MaxRPM: 150, MaxTPM: 1000000, MaxRPD: 1000},
				"gemini-2.0-flash":       {MaxRPM: 150, MaxTPM: 1000000, MaxRPD: 0}, // Unlimited RPD
				"gemini-2.0-flash-lite":  {MaxRPM: 0, MaxTPM: 0, MaxRPD: 0},         // Unlimited
			},
		},
		ProviderOpenAI: {
			Provider: ProviderOpenAI,
			Models: map[string]ModelQuota{
				"gpt-4o":      {MaxRPM: 500, MaxTPM: 30000, MaxRPD: 0},
				"gpt-4o-mini": {MaxRPM: 500, MaxTPM: 200000, MaxRPD: 0},
				"gpt-4-turbo": {MaxRPM: 500, MaxTPM: 30000, MaxRPD: 0},
				"o1":          {MaxRPM: 500, MaxTPM: 30000, MaxRPD: 0},
				"o1-mini":     {MaxRPM: 500, MaxTPM: 200000, MaxRPD: 0},
				"o3-mini":     {MaxRPM: 500, MaxTPM: 200000, MaxRPD: 0},
			},
		},
		ProviderAnthropic: {
			Provider: ProviderAnthropic,
			Models: map[string]ModelQuota{
				"claude-opus-4":    {MaxRPM: 50, MaxTPM: 40000, MaxRPD: 0},
				"claude-sonnet-4":  {MaxRPM: 50, MaxTPM: 40000, MaxRPD: 0},
				"claude-haiku-3.5": {MaxRPM: 50, MaxTPM: 50000, MaxRPD: 0},
			},
		},
		ProviderLocal: {
			Provider: ProviderLocal,
			Models:   map[string]ModelQuota{
				// Local inference has no external rate limits by default.
				// Users can override per-model if their hardware requires throttling.
			},
		},
	}
}

// New creates a new RateLimiter with Gemini defaults.
// This preserves backward compatibility -- existing callers are unaffected.
//
//	rl, err := New()
func New() (*RateLimiter, error) {
	return NewWithConfig(Config{
		Providers: []Provider{ProviderGemini},
	})
}

// NewWithConfig creates a RateLimiter from explicit configuration.
// If no providers or quotas are specified, Gemini defaults are used.
//
//	rl, err := NewWithConfig(Config{Providers: []Provider{ProviderAnthropic}})
func NewWithConfig(cfg Config) (*RateLimiter, error) {
	backend, err := normaliseBackend(cfg.Backend)
	if err != nil {
		return nil, err
	}

	filePath := cfg.FilePath
	if filePath == "" {
		filePath, err = defaultStatePath(backend)
		if err != nil {
			return nil, err
		}
	}

	if backend == backendSQLite {
		if cfg.FilePath == "" {
			if err := ensureDir(core.PathDir(filePath)); err != nil {
				return nil, core.E("ratelimit.NewWithConfig", "mkdir", err)
			}
		}
		return NewWithSQLiteConfig(filePath, cfg)
	}

	rl := newConfiguredRateLimiter(cfg)
	rl.filePath = filePath
	return rl, nil
}

// SetQuota sets or updates the quota for a specific model at runtime.
//
//	rl.SetQuota("gpt-4o-mini", ModelQuota{MaxRPM: 60, MaxTPM: 200000})
func (rl *RateLimiter) SetQuota(model string, quota ModelQuota) {
	rl.mu.Lock()
	defer rl.mu.Unlock()
	rl.Quotas[model] = quota
}

// AddProvider loads all default quotas for a provider.
// Existing quotas for models in the profile are overwritten.
//
//	rl.AddProvider(ProviderOpenAI)
func (rl *RateLimiter) AddProvider(provider Provider) {
	rl.mu.Lock()
	defer rl.mu.Unlock()

	profiles := DefaultProfiles()
	if profile, ok := profiles[provider]; ok {
		maps.Copy(rl.Quotas, profile.Models)
	}
}

// Load reads the state from disk (YAML) or database (SQLite).
//
//	if err := rl.Load(); err != nil { /* handle error */ }
func (rl *RateLimiter) Load() error {
	rl.mu.Lock()
	defer rl.mu.Unlock()

	if rl.sqlite != nil {
		return rl.loadSQLite()
	}

	content, err := readLocalFile(rl.filePath)
	if core.Is(err, fs.ErrNotExist) {
		return nil
	}
	if err != nil {
		return err
	}

	if err := yaml.Unmarshal([]byte(content), rl); err != nil {
		return err
	}

	ensureMaps(rl)
	return nil
}

// loadSQLite reads quotas and state from the SQLite backend.
// Caller must hold the lock.
func (rl *RateLimiter) loadSQLite() error {
	quotas, err := rl.sqlite.loadQuotas()
	if err != nil {
		return err
	}
	// Persisted quotas are authoritative when present; otherwise keep config defaults.
	if len(quotas) > 0 {
		rl.Quotas = maps.Clone(quotas)
	}

	state, err := rl.sqlite.loadState()
	if err != nil {
		return err
	}
	// Replace in-memory state with the persisted snapshot.
	rl.State = state
	return nil
}

// Persist writes a snapshot of the state to disk (YAML) or database (SQLite).
// It clones the state under a lock and performs I/O without blocking other callers.
//
//	if err := rl.Persist(); err != nil { /* handle error */ }
func (rl *RateLimiter) Persist() error {
	rl.mu.Lock()
	quotas := maps.Clone(rl.Quotas)
	state := make(map[string]*UsageStats, len(rl.State))
	for k, v := range rl.State {
		if v == nil {
			continue
		}
		state[k] = &UsageStats{
			Requests: slices.Clone(v.Requests),
			Tokens:   slices.Clone(v.Tokens),
			DayStart: v.DayStart,
			DayCount: v.DayCount,
		}
	}
	sqlite := rl.sqlite
	filePath := rl.filePath
	rl.mu.Unlock()

	if sqlite != nil {
		if err := sqlite.saveSnapshot(quotas, state); err != nil {
			return core.E("ratelimit.Persist", "sqlite snapshot", err)
		}
		return nil
	}

	// For YAML, we marshal the entire RateLimiter, but since we want to avoid
	// holding the lock during marshal, we marshal a temporary struct.
	data, err := yaml.Marshal(struct {
		Quotas map[string]ModelQuota  `yaml:"quotas"`
		State  map[string]*UsageStats `yaml:"state"`
	}{
		Quotas: quotas,
		State:  state,
	})
	if err != nil {
		return core.E("ratelimit.Persist", "marshal", err)
	}

	if err := writeLocalFile(filePath, string(data)); err != nil {
		return core.E("ratelimit.Persist", "write", err)
	}
	return nil
}

// prune removes entries older than the sliding window (1 minute) and removes
// empty state for models that haven't been used recently.
// Caller must hold lock.
func (rl *RateLimiter) prune(model string) {
	stats, ok := rl.State[model]
	if !ok {
		return
	}
	if stats == nil {
		delete(rl.State, model)
		return
	}

	now := time.Now()
	window := now.Add(-1 * time.Minute)

	// Prune requests
	stats.Requests = slices.DeleteFunc(stats.Requests, func(t time.Time) bool {
		return t.Before(window)
	})

	// Prune tokens
	stats.Tokens = slices.DeleteFunc(stats.Tokens, func(t TokenEntry) bool {
		return t.Time.Before(window)
	})

	// Reset daily counter if day has passed
	if now.Sub(stats.DayStart) >= 24*time.Hour {
		stats.DayStart = now
		stats.DayCount = 0
	}

	// If everything is empty and it's been more than a minute since last activity,
	// delete the model state entirely to prevent memory leaks.
	if len(stats.Requests) == 0 && len(stats.Tokens) == 0 {
		// We could use a more sophisticated TTL here, but for now just cleanup empty ones.
		// Note: we don't delete if DayCount > 0 and it's still the same day.
		if stats.DayCount == 0 {
			delete(rl.State, model)
		}
	}
}

// BackgroundPrune starts a goroutine that periodically prunes all model states.
// It returns a function to stop the pruner.
//
//	stop := rl.BackgroundPrune(30 * time.Second)
//	defer stop()
func (rl *RateLimiter) BackgroundPrune(interval time.Duration) func() {
	if interval <= 0 {
		return func() {}
	}

	ctx, cancel := context.WithCancel(context.Background())
	go func() {
		ticker := time.NewTicker(interval)
		defer ticker.Stop()
		for {
			select {
			case <-ctx.Done():
				return
			case <-ticker.C:
				rl.mu.Lock()
				for m := range rl.State {
					rl.prune(m)
				}
				rl.mu.Unlock()
			}
		}
	}()
	return cancel
}

// CanSend checks if a request can be sent without violating limits.
//
//	ok := rl.CanSend("gemini-3-pro-preview", 1200)
func (rl *RateLimiter) CanSend(model string, estimatedTokens int) bool {
	return rl.Decide(model, estimatedTokens).Allowed
}

// RecordUsage records a successful API call.
//
//	rl.RecordUsage("gemini-3-pro-preview", 900, 300)
func (rl *RateLimiter) RecordUsage(model string, promptTokens, outputTokens int) {
	rl.mu.Lock()
	defer rl.mu.Unlock()

	rl.prune(model) // Prune before recording to ensure we're not exceeding limits immediately after
	stats, ok := rl.State[model]
	if !ok {
		stats = &UsageStats{DayStart: time.Now()}
		rl.State[model] = stats
	}

	now := time.Now()
	totalTokens := safeTokenSum(promptTokens, outputTokens)
	stats.Requests = append(stats.Requests, now)
	stats.Tokens = append(stats.Tokens, TokenEntry{Time: now, Count: totalTokens})
	stats.DayCount++
}

// WaitForCapacity blocks until capacity is available or context is cancelled.
//
//	err := rl.WaitForCapacity(ctx, "gemini-3-pro-preview", 1200)
func (rl *RateLimiter) WaitForCapacity(ctx context.Context, model string, tokens int) error {
	if tokens < 0 {
		return core.E("ratelimit.WaitForCapacity", "negative tokens", nil)
	}

	for {
		decision := rl.Decide(model, tokens)
		if decision.Allowed {
			return nil
		}

		sleep := decision.RetryAfter
		if sleep <= 0 {
			sleep = time.Second
		}

		timer := time.NewTimer(sleep)
		select {
		case <-ctx.Done():
			timer.Stop()
			return ctx.Err()
		case <-timer.C:
			timer.Stop()
		}
	}
}

// Reset clears stats for a model (or all if model is empty).
//
//	rl.Reset("gemini-3-pro-preview")
func (rl *RateLimiter) Reset(model string) {
	rl.mu.Lock()
	defer rl.mu.Unlock()

	if model == "" {
		rl.State = make(map[string]*UsageStats)
	} else {
		delete(rl.State, model)
	}
}

// ModelStats represents a snapshot of usage.
//
//	stats := rl.Stats("gemini-3-pro-preview")
type ModelStats struct {
	// RPM is the current requests-per-minute usage in the sliding window.
	RPM int
	// MaxRPM is the configured requests-per-minute limit.
	MaxRPM int
	// TPM is the current tokens-per-minute usage in the sliding window.
	TPM int
	// MaxTPM is the configured tokens-per-minute limit.
	MaxTPM int
	// RPD is the current requests-per-day usage in the rolling 24-hour window.
	RPD int
	// MaxRPD is the configured requests-per-day limit.
	MaxRPD int
	// DayStart is the start of the current rolling 24-hour window.
	DayStart time.Time
}

// DecisionCode identifies the reason for an allow or deny outcome from Decide.
type DecisionCode string

const (
	// DecisionAllowed means the request fits within all configured limits.
	DecisionAllowed DecisionCode = "ok"
	// DecisionUnknownModel means the model has no configured quotas and is therefore allowed.
	DecisionUnknownModel DecisionCode = "unknown_model"
	// DecisionUnlimited means the model is configured with no limits.
	DecisionUnlimited DecisionCode = "unlimited"
	// DecisionInvalidTokens means a negative token estimate was provided.
	DecisionInvalidTokens DecisionCode = "invalid_tokens"
	// DecisionRPDLimit means the rolling 24-hour request limit has been reached.
	DecisionRPDLimit DecisionCode = "rpd_exceeded"
	// DecisionRPMLimit means the per-minute request limit has been reached.
	DecisionRPMLimit DecisionCode = "rpm_exceeded"
	// DecisionTPMLimit means the per-minute token limit would be exceeded.
	DecisionTPMLimit DecisionCode = "tpm_exceeded"
)

// Decision captures an allow/deny decision with context for agents.
// RetryAfter is zero when the request is allowed or when no meaningful wait time exists.
type Decision struct {
	Allowed    bool
	Code       DecisionCode
	Reason     string
	RetryAfter time.Duration
	Stats      ModelStats
}

// Models returns a sorted iterator over all model names tracked by the limiter.
//
//	for model := range rl.Models() { println(model) }
func (rl *RateLimiter) Models() iter.Seq[string] {
	rl.mu.RLock()
	defer rl.mu.RUnlock()

	// Use maps.Keys and slices.Sorted for idiomatic Go 1.26+
	keys := slices.Collect(maps.Keys(rl.Quotas))
	for m := range rl.State {
		if _, ok := rl.Quotas[m]; !ok {
			keys = append(keys, m)
		}
	}
	slices.Sort(keys)
	return slices.Values(keys)
}

// Iter returns a sorted iterator over all model names and their current stats.
//
//	for model, stats := range rl.Iter() { _ = stats; println(model) }
func (rl *RateLimiter) Iter() iter.Seq2[string, ModelStats] {
	return func(yield func(string, ModelStats) bool) {
		stats := rl.AllStats()
		for _, m := range slices.Sorted(maps.Keys(stats)) {
			if !yield(m, stats[m]) {
				return
			}
		}
	}
}

// Stats returns current stats for a model.
//
//	stats := rl.Stats("gemini-3-pro-preview")
func (rl *RateLimiter) Stats(model string) ModelStats {
	rl.mu.Lock()
	defer rl.mu.Unlock()

	rl.prune(model)

	return rl.snapshotLocked(model)
}

// AllStats returns stats for all tracked models.
//
//	all := rl.AllStats()
func (rl *RateLimiter) AllStats() map[string]ModelStats {
	rl.mu.Lock()
	defer rl.mu.Unlock()

	result := make(map[string]ModelStats)

	// Collect all model names
	for m := range rl.Quotas {
		result[m] = ModelStats{}
	}
	for m := range rl.State {
		result[m] = ModelStats{}
	}

	for m := range result {
		rl.prune(m)

		result[m] = rl.snapshotLocked(m)
	}

	return result
}

// Decide returns structured allow/deny information for an estimated request.
// It never records usage; call RecordUsage after a successful decision.
func (rl *RateLimiter) Decide(model string, estimatedTokens int) Decision {
	rl.mu.Lock()
	defer rl.mu.Unlock()

	now := time.Now()
	decision := Decision{}

	if estimatedTokens < 0 {
		decision.Allowed = false
		decision.Code = DecisionInvalidTokens
		decision.Reason = "estimated tokens must be non-negative"
		decision.Stats = rl.snapshotLocked(model)
		return decision
	}

	quota, ok := rl.Quotas[model]
	if !ok {
		decision.Allowed = true
		decision.Code = DecisionUnknownModel
		decision.Reason = "model has no configured quota"
		decision.Stats = rl.snapshotLocked(model)
		return decision
	}

	if quota.MaxRPM == 0 && quota.MaxTPM == 0 && quota.MaxRPD == 0 {
		decision.Allowed = true
		decision.Code = DecisionUnlimited
		decision.Reason = "all limits are unlimited"
		decision.Stats = rl.snapshotLocked(model)
		return decision
	}

	rl.prune(model)
	stats, ok := rl.State[model]
	if !ok || stats == nil {
		stats = &UsageStats{DayStart: now}
		rl.State[model] = stats
	}

	decision.Stats = rl.snapshotLocked(model)

	if quota.MaxRPD > 0 && stats.DayCount >= quota.MaxRPD {
		decision.Code = DecisionRPDLimit
		decision.Reason = "daily request limit reached"
		decision.RetryAfter = nonNegativeDuration(stats.DayStart.Add(24 * time.Hour).Sub(now))
		return decision
	}

	if quota.MaxRPM > 0 && len(stats.Requests) >= quota.MaxRPM {
		decision.Code = DecisionRPMLimit
		decision.Reason = "per-minute request limit reached"
		if len(stats.Requests) > 0 {
			decision.RetryAfter = nonNegativeDuration(stats.Requests[0].Add(time.Minute).Sub(now))
		}
		return decision
	}

	if quota.MaxTPM > 0 {
		currentTokens := totalTokenCount(stats.Tokens)
		if estimatedTokens > quota.MaxTPM || currentTokens > quota.MaxTPM-estimatedTokens {
			decision.Code = DecisionTPMLimit
			decision.Reason = "per-minute token limit reached"
			decision.RetryAfter = retryAfterForTokens(now, stats.Tokens, quota.MaxTPM, estimatedTokens)
			return decision
		}
	}

	decision.Allowed = true
	decision.Code = DecisionAllowed
	decision.Reason = "within quota"
	return decision
}

// snapshotLocked builds ModelStats for the provided model.
// Caller must hold rl.mu.
func (rl *RateLimiter) snapshotLocked(model string) ModelStats {
	stats := ModelStats{}

	if q, ok := rl.Quotas[model]; ok {
		stats.MaxRPM = q.MaxRPM
		stats.MaxTPM = q.MaxTPM
		stats.MaxRPD = q.MaxRPD
	}

	if s, ok := rl.State[model]; ok && s != nil {
		stats.RPM = len(s.Requests)
		stats.RPD = s.DayCount
		stats.DayStart = s.DayStart
		stats.TPM = totalTokenCount(s.Tokens)
	}
	return stats
}

// NewWithSQLite creates a SQLite-backed RateLimiter with Gemini defaults.
// The database is created at dbPath if it does not exist. Use Close() to
// release the database connection when finished.
//
//	rl, err := NewWithSQLite("/tmp/ratelimits.db")
func NewWithSQLite(dbPath string) (*RateLimiter, error) {
	return NewWithSQLiteConfig(dbPath, Config{
		Providers: []Provider{ProviderGemini},
	})
}

// NewWithSQLiteConfig creates a SQLite-backed RateLimiter with custom config.
// The Backend field in cfg is ignored (always "sqlite"). Use Close() to
// release the database connection when finished.
//
//	rl, err := NewWithSQLiteConfig("/tmp/ratelimits.db", Config{Providers: []Provider{ProviderOpenAI}})
func NewWithSQLiteConfig(dbPath string, cfg Config) (*RateLimiter, error) {
	store, err := newSQLiteStore(dbPath)
	if err != nil {
		return nil, err
	}

	rl := newConfiguredRateLimiter(cfg)
	rl.sqlite = store
	return rl, nil
}

// Close releases resources held by the RateLimiter. For YAML-backed
// limiters this is a no-op. For SQLite-backed limiters it closes the
// database connection.
//
//	defer rl.Close()
func (rl *RateLimiter) Close() error {
	if rl.sqlite != nil {
		return rl.sqlite.close()
	}
	return nil
}

// MigrateYAMLToSQLite reads state from a YAML file and writes it to a new
// SQLite database. Both quotas and usage state are migrated. The SQLite
// database is created if it does not exist.
//
//	err := MigrateYAMLToSQLite("ratelimits.yaml", "ratelimits.db")
func MigrateYAMLToSQLite(yamlPath, sqlitePath string) error {
	// Load from YAML.
	content, err := readLocalFile(yamlPath)
	if err != nil {
		return core.E("ratelimit.MigrateYAMLToSQLite", "read", err)
	}

	var rl RateLimiter
	if err := yaml.Unmarshal([]byte(content), &rl); err != nil {
		return core.E("ratelimit.MigrateYAMLToSQLite", "unmarshal", err)
	}

	// Write to SQLite.
	store, err := newSQLiteStore(sqlitePath)
	if err != nil {
		return err
	}
	defer store.close()

	if err := store.saveSnapshot(rl.Quotas, rl.State); err != nil {
		return err
	}
	return nil
}

// CountTokens calls the Google API to count tokens for a prompt.
//
//	tokens, err := CountTokens(ctx, apiKey, "gemini-3-pro-preview", prompt)
func CountTokens(ctx context.Context, apiKey, model, text string) (int, error) {
	return countTokensWithClient(ctx, http.DefaultClient, "https://generativelanguage.googleapis.com", apiKey, model, text)
}

func countTokensWithClient(ctx context.Context, client *http.Client, baseURL, apiKey, model, text string) (int, error) {
	requestURL, err := countTokensURL(baseURL, model)
	if err != nil {
		return 0, core.E("ratelimit.CountTokens", "build url", err)
	}

	reqBody := map[string]any{
		"contents": []any{
			map[string]any{
				"parts": []any{
					map[string]string{"text": text},
				},
			},
		},
	}

	jsonBody := core.JSONMarshal(reqBody)
	if !jsonBody.OK {
		return 0, core.E("ratelimit.CountTokens", "marshal request", resultError(jsonBody))
	}

	req, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL, core.NewReader(string(jsonBody.Value.([]byte))))
	if err != nil {
		return 0, core.E("ratelimit.CountTokens", "new request", err)
	}
	req.Header.Set("Content-Type", "application/json")
	req.Header.Set("x-goog-api-key", apiKey)

	if client == nil {
		client = http.DefaultClient
	}

	resp, err := client.Do(req)
	if err != nil {
		return 0, core.E("ratelimit.CountTokens", "do request", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, err := readLimitedBody(resp.Body, countTokensErrorBodyLimit)
		if err != nil {
			return 0, core.E("ratelimit.CountTokens", "read error body", err)
		}
		return 0, core.E("ratelimit.CountTokens", core.Sprintf("api error status %d: %s", resp.StatusCode, body), nil)
	}

	body, err := readLimitedBody(resp.Body, countTokensSuccessBodyLimit)
	if err != nil {
		return 0, core.E("ratelimit.CountTokens", "decode response", err)
	}

	var result struct {
		TotalTokens int `json:"totalTokens"`
	}
	decode := core.JSONUnmarshalString(body, &result)
	if !decode.OK {
		return 0, core.E("ratelimit.CountTokens", "decode response", resultError(decode))
	}

	return result.TotalTokens, nil
}

func newConfiguredRateLimiter(cfg Config) *RateLimiter {
	rl := &RateLimiter{
		Quotas: make(map[string]ModelQuota),
		State:  make(map[string]*UsageStats),
	}
	applyConfig(rl, cfg)
	return rl
}

func ensureMaps(rl *RateLimiter) {
	if rl.Quotas == nil {
		rl.Quotas = make(map[string]ModelQuota)
	}
	if rl.State == nil {
		rl.State = make(map[string]*UsageStats)
	}
}

func applyConfig(rl *RateLimiter, cfg Config) {
	profiles := DefaultProfiles()
	providers := cfg.Providers

	if len(providers) == 0 && len(cfg.Quotas) == 0 {
		providers = []Provider{ProviderGemini}
	}

	for _, p := range providers {
		if profile, ok := profiles[p]; ok {
			maps.Copy(rl.Quotas, profile.Models)
		}
	}

	maps.Copy(rl.Quotas, cfg.Quotas)
}

func normaliseBackend(backend string) (string, error) {
	switch core.Lower(core.Trim(backend)) {
	case "", backendYAML:
		return backendYAML, nil
	case backendSQLite:
		return backendSQLite, nil
	default:
		return "", core.E("ratelimit.NewWithConfig", core.Sprintf("unknown backend %q", backend), nil)
	}
}

func defaultStatePath(backend string) (string, error) {
	home := currentHomeDir()
	if home == "" {
		return "", core.E("ratelimit.defaultStatePath", "home dir unavailable", nil)
	}

	fileName := defaultYAMLStateFile
	if backend == backendSQLite {
		fileName = defaultSQLiteStateFile
	}

	return core.Path(home, defaultStateDirName, fileName), nil
}

func currentHomeDir() string {
	for _, key := range []string{"CORE_HOME", "HOME", "home", "USERPROFILE"} {
		if value := core.Trim(core.Env(key)); value != "" {
			return value
		}
	}
	return ""
}

func safeTokenSum(a, b int) int {
	return safeTokenTotal([]TokenEntry{{Count: a}, {Count: b}})
}

func totalTokenCount(tokens []TokenEntry) int {
	return safeTokenTotal(tokens)
}

func safeTokenTotal(tokens []TokenEntry) int {
	const maxInt = int(^uint(0) >> 1)

	total := 0
	for _, token := range tokens {
		count := token.Count
		if count < 0 {
			continue
		}
		if total > maxInt-count {
			return maxInt
		}
		total += count
	}
	return total
}

func retryAfterForTokens(now time.Time, tokens []TokenEntry, maxTPM, estimatedTokens int) time.Duration {
	if maxTPM <= 0 {
		return 0
	}

	deficit := totalTokenCount(tokens) + estimatedTokens - maxTPM
	if deficit <= 0 {
		return 0
	}

	remaining := deficit
	for _, entry := range tokens {
		if entry.Count < 0 {
			continue
		}
		remaining -= entry.Count
		if remaining <= 0 {
			return nonNegativeDuration(entry.Time.Add(time.Minute).Sub(now))
		}
	}

	return 0
}

func nonNegativeDuration(value time.Duration) time.Duration {
	if value < 0 {
		return 0
	}
	return value
}

func countTokensURL(baseURL, model string) (string, error) {
	if core.Trim(model) == "" {
		return "", core.E("ratelimit.countTokensURL", "empty model", nil)
	}

	parsed, err := url.Parse(baseURL)
	if err != nil {
		return "", err
	}
	if parsed.Scheme == "" || parsed.Host == "" {
		return "", core.E("ratelimit.countTokensURL", "invalid base url", nil)
	}

	return core.Concat(core.TrimSuffix(parsed.String(), "/"), "/v1beta/models/", url.PathEscape(model), ":countTokens"), nil
}

func readLimitedBody(r io.Reader, limit int64) (string, error) {
	body, err := io.ReadAll(io.LimitReader(r, limit+1))
	if err != nil {
		return "", err
	}

	truncated := int64(len(body)) > limit
	if truncated {
		body = body[:limit]
	}

	result := string(body)
	if truncated {
		result += "..."
	}
	return result, nil
}

func readLocalFile(path string) (string, error) {
	var fs core.Fs
	result := fs.Read(path)
	if !result.OK {
		return "", resultError(result)
	}

	content, ok := result.Value.(string)
	if !ok {
		return "", core.E("ratelimit.readLocalFile", "read returned non-string", nil)
	}
	return content, nil
}

func writeLocalFile(path, content string) error {
	var fs core.Fs
	return resultError(fs.Write(path, content))
}

func ensureDir(path string) error {
	var fs core.Fs
	return resultError(fs.EnsureDir(path))
}

func resultError(result core.Result) error {
	if result.OK {
		return nil
	}
	if err, ok := result.Value.(error); ok {
		return err
	}
	if result.Value == nil {
		return nil
	}
	return core.E("ratelimit.resultError", core.Sprint(result.Value), nil)
}