diff --git a/options.go b/options.go
index 48cb350..5546a11 100644
--- a/options.go
+++ b/options.go
@@ -62,9 +62,10 @@ func ApplyGenerateOpts(opts []GenerateOption) GenerateConfig {
 
 // LoadConfig holds model loading parameters.
 type LoadConfig struct {
-	Backend    string // "metal", "rocm", "llama_cpp" (empty = auto-detect)
-	ContextLen int    // Context window size (0 = model default)
-	GPULayers  int    // Number of layers to offload to GPU (-1 = all, 0 = none)
+	Backend       string // "metal", "rocm", "llama_cpp" (empty = auto-detect)
+	ContextLen    int    // Context window size (0 = model default)
+	GPULayers     int    // Number of layers to offload to GPU (-1 = all, 0 = none)
+	ParallelSlots int    // Number of concurrent inference slots (0 = server default)
 }
 
 // LoadOption configures model loading.
@@ -86,6 +87,13 @@ func WithGPULayers(n int) LoadOption {
 	return func(c *LoadConfig) { c.GPULayers = n }
 }
 
+// WithParallelSlots sets the number of concurrent inference slots.
+// Higher values allow parallel Generate/Chat calls but increase VRAM usage.
+// 0 or unset uses the server default (typically 1).
+func WithParallelSlots(n int) LoadOption {
+	return func(c *LoadConfig) { c.ParallelSlots = n }
+}
+
 // ApplyLoadOpts builds a LoadConfig from options.
 func ApplyLoadOpts(opts []LoadOption) LoadConfig {
 	cfg := LoadConfig{