fix: guard response body lifecycle in SSE streaming client

Use sync.Once to ensure resp.Body is closed exactly once, preventing
TCP connection leaks when the iterator is never consumed and
double-close when iterated twice. Also adds Accept: text/event-stream
header to both SSE endpoints.

Co-Authored-By: Virgil <virgil@lethean.io>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude 2026-02-19 21:04:02 +00:00
parent 1bc8c9948b
commit 5778f1f011
No known key found for this signature in database
GPG key ID: AF404715446AEB41

View file

@ -10,6 +10,7 @@ import (
"iter" "iter"
"net/http" "net/http"
"strings" "strings"
"sync"
) )
// ChatMessage is a single message in a conversation. // ChatMessage is a single message in a conversation.
@ -72,6 +73,7 @@ func (c *Client) ChatComplete(ctx context.Context, req ChatRequest) (iter.Seq[st
return noChunks, func() error { return fmt.Errorf("llamacpp: create chat request: %w", err) } return noChunks, func() error { return fmt.Errorf("llamacpp: create chat request: %w", err) }
} }
httpReq.Header.Set("Content-Type", "application/json") httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Accept", "text/event-stream")
resp, err := c.httpClient.Do(httpReq) resp, err := c.httpClient.Do(httpReq)
if err != nil { if err != nil {
@ -86,11 +88,15 @@ func (c *Client) ChatComplete(ctx context.Context, req ChatRequest) (iter.Seq[st
} }
} }
var streamErr error var (
streamErr error
closeOnce sync.Once
closeBody = func() { closeOnce.Do(func() { resp.Body.Close() }) }
)
sseData := parseSSE(resp.Body, &streamErr) sseData := parseSSE(resp.Body, &streamErr)
tokens := func(yield func(string) bool) { tokens := func(yield func(string) bool) {
defer resp.Body.Close() defer closeBody()
for raw := range sseData { for raw := range sseData {
var chunk chatChunkResponse var chunk chatChunkResponse
if err := json.Unmarshal([]byte(raw), &chunk); err != nil { if err := json.Unmarshal([]byte(raw), &chunk); err != nil {
@ -110,7 +116,10 @@ func (c *Client) ChatComplete(ctx context.Context, req ChatRequest) (iter.Seq[st
} }
} }
return tokens, func() error { return streamErr } return tokens, func() error {
closeBody()
return streamErr
}
} }
// Complete sends a streaming completion request to /v1/completions. // Complete sends a streaming completion request to /v1/completions.
@ -129,6 +138,7 @@ func (c *Client) Complete(ctx context.Context, req CompletionRequest) (iter.Seq[
return noChunks, func() error { return fmt.Errorf("llamacpp: create completion request: %w", err) } return noChunks, func() error { return fmt.Errorf("llamacpp: create completion request: %w", err) }
} }
httpReq.Header.Set("Content-Type", "application/json") httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Accept", "text/event-stream")
resp, err := c.httpClient.Do(httpReq) resp, err := c.httpClient.Do(httpReq)
if err != nil { if err != nil {
@ -143,11 +153,15 @@ func (c *Client) Complete(ctx context.Context, req CompletionRequest) (iter.Seq[
} }
} }
var streamErr error var (
streamErr error
closeOnce sync.Once
closeBody = func() { closeOnce.Do(func() { resp.Body.Close() }) }
)
sseData := parseSSE(resp.Body, &streamErr) sseData := parseSSE(resp.Body, &streamErr)
tokens := func(yield func(string) bool) { tokens := func(yield func(string) bool) {
defer resp.Body.Close() defer closeBody()
for raw := range sseData { for raw := range sseData {
var chunk completionChunkResponse var chunk completionChunkResponse
if err := json.Unmarshal([]byte(raw), &chunk); err != nil { if err := json.Unmarshal([]byte(raw), &chunk); err != nil {
@ -167,7 +181,10 @@ func (c *Client) Complete(ctx context.Context, req CompletionRequest) (iter.Seq[
} }
} }
return tokens, func() error { return streamErr } return tokens, func() error {
closeBody()
return streamErr
}
} }
// parseSSE reads SSE-formatted lines from r and yields the payload of each // parseSSE reads SSE-formatted lines from r and yields the payload of each