feat(parser): Phase 1+2 — parse stats, truncation detection, session analytics

Phase 1: ParseTranscript now returns (*Session, *ParseStats, error). ParseStats tracks TotalLines, SkippedLines, OrphanedToolCalls, and Warnings (line numbers + previews for bad JSON, orphaned tool IDs, truncated final line detection). All call sites updated. Phase 2: New analytics.go with Analyse() and FormatAnalytics(). SessionAnalytics computes Duration, ActiveTime, ToolCounts, ErrorCounts, SuccessRate, AvgLatency, MaxLatency, and token estimation. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 07:40:52 +00:00 · 2026-02-20 07:40:52 +00:00 · a6fb93408b
commit a6fb93408b
parent c798d9d5a6
6 changed files with 669 additions and 23 deletions
--- a/analytics.go
+++ b/analytics.go
@ -0,0 +1,137 @@
+// SPDX-Licence-Identifier: EUPL-1.2
+package session
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+	"time"
+)
+
+// SessionAnalytics holds computed metrics for a parsed session.
+type SessionAnalytics struct {
+	Duration             time.Duration
+	ActiveTime           time.Duration
+	EventCount           int
+	ToolCounts           map[string]int
+	ErrorCounts          map[string]int
+	SuccessRate          float64
+	AvgLatency           map[string]time.Duration
+	MaxLatency           map[string]time.Duration
+	EstimatedInputTokens  int
+	EstimatedOutputTokens int
+}
+
+// Analyse iterates session events and computes analytics. Pure function, no I/O.
+func Analyse(sess *Session) *SessionAnalytics {
+	a := &SessionAnalytics{
+		ToolCounts:  make(map[string]int),
+		ErrorCounts: make(map[string]int),
+		AvgLatency:  make(map[string]time.Duration),
+		MaxLatency:  make(map[string]time.Duration),
+	}
+
+	if sess == nil {
+		return a
+	}
+
+	a.Duration = sess.EndTime.Sub(sess.StartTime)
+	a.EventCount = len(sess.Events)
+
+	// Track totals for latency averaging
+	type latencyAccum struct {
+		total time.Duration
+		count int
+	}
+	latencies := make(map[string]*latencyAccum)
+
+	var totalToolCalls int
+	var totalErrors int
+
+	for _, evt := range sess.Events {
+		// Token estimation: ~4 chars per token
+		a.EstimatedInputTokens += len(evt.Input) / 4
+		a.EstimatedOutputTokens += len(evt.Output) / 4
+
+		if evt.Type != "tool_use" {
+			continue
+		}
+
+		totalToolCalls++
+		a.ToolCounts[evt.Tool]++
+
+		if !evt.Success {
+			totalErrors++
+			a.ErrorCounts[evt.Tool]++
+		}
+
+		// Active time: sum of tool call durations
+		a.ActiveTime += evt.Duration
+
+		// Latency tracking
+		if _, ok := latencies[evt.Tool]; !ok {
+			latencies[evt.Tool] = &latencyAccum{}
+		}
+		latencies[evt.Tool].total += evt.Duration
+		latencies[evt.Tool].count++
+
+		if evt.Duration > a.MaxLatency[evt.Tool] {
+			a.MaxLatency[evt.Tool] = evt.Duration
+		}
+	}
+
+	// Compute averages
+	for tool, acc := range latencies {
+		if acc.count > 0 {
+			a.AvgLatency[tool] = acc.total / time.Duration(acc.count)
+		}
+	}
+
+	// Success rate
+	if totalToolCalls > 0 {
+		a.SuccessRate = float64(totalToolCalls-totalErrors) / float64(totalToolCalls)
+	}
+
+	return a
+}
+
+// FormatAnalytics returns a tabular text summary suitable for CLI display.
+func FormatAnalytics(a *SessionAnalytics) string {
+	var b strings.Builder
+
+	b.WriteString("Session Analytics\n")
+	b.WriteString(strings.Repeat("=", 50) + "\n\n")
+
+	b.WriteString(fmt.Sprintf("  Duration:       %s\n", formatDuration(a.Duration)))
+	b.WriteString(fmt.Sprintf("  Active Time:    %s\n", formatDuration(a.ActiveTime)))
+	b.WriteString(fmt.Sprintf("  Events:         %d\n", a.EventCount))
+	b.WriteString(fmt.Sprintf("  Success Rate:   %.1f%%\n", a.SuccessRate*100))
+	b.WriteString(fmt.Sprintf("  Est. Input Tk:  %d\n", a.EstimatedInputTokens))
+	b.WriteString(fmt.Sprintf("  Est. Output Tk: %d\n", a.EstimatedOutputTokens))
+
+	if len(a.ToolCounts) > 0 {
+		b.WriteString("\n  Tool Breakdown\n")
+		b.WriteString("  " + strings.Repeat("-", 48) + "\n")
+		b.WriteString(fmt.Sprintf("  %-14s %6s %6s %10s %10s\n",
+			"Tool", "Calls", "Errors", "Avg", "Max"))
+		b.WriteString("  " + strings.Repeat("-", 48) + "\n")
+
+		// Sort tools for deterministic output
+		tools := make([]string, 0, len(a.ToolCounts))
+		for t := range a.ToolCounts {
+			tools = append(tools, t)
+		}
+		sort.Strings(tools)
+
+		for _, tool := range tools {
+			errors := a.ErrorCounts[tool]
+			avg := a.AvgLatency[tool]
+			max := a.MaxLatency[tool]
+			b.WriteString(fmt.Sprintf("  %-14s %6d %6d %10s %10s\n",
+				tool, a.ToolCounts[tool], errors,
+				formatDuration(avg), formatDuration(max)))
+		}
+	}
+
+	return b.String()
+}
--- a/analytics_test.go
+++ b/analytics_test.go
@ -0,0 +1,286 @@
+// SPDX-Licence-Identifier: EUPL-1.2
+package session
+
+import (
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestAnalyse_EmptySession_Good(t *testing.T) {
+	sess := &Session{
+		ID:        "empty",
+		StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC),
+		EndTime:   time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC),
+		Events:    nil,
+	}
+
+	a := Analyse(sess)
+	require.NotNil(t, a)
+
+	assert.Equal(t, time.Duration(0), a.Duration)
+	assert.Equal(t, time.Duration(0), a.ActiveTime)
+	assert.Equal(t, 0, a.EventCount)
+	assert.Equal(t, 0.0, a.SuccessRate)
+	assert.Empty(t, a.ToolCounts)
+	assert.Empty(t, a.ErrorCounts)
+	assert.Equal(t, 0, a.EstimatedInputTokens)
+	assert.Equal(t, 0, a.EstimatedOutputTokens)
+}
+
+func TestAnalyse_NilSession_Good(t *testing.T) {
+	a := Analyse(nil)
+	require.NotNil(t, a)
+	assert.Equal(t, 0, a.EventCount)
+}
+
+func TestAnalyse_SingleToolCall_Good(t *testing.T) {
+	sess := &Session{
+		ID:        "single",
+		StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC),
+		EndTime:   time.Date(2026, 2, 20, 10, 0, 5, 0, time.UTC),
+		Events: []Event{
+			{
+				Timestamp: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC),
+				Type:      "tool_use",
+				Tool:      "Bash",
+				Input:     "go test ./...",
+				Output:    "PASS",
+				Duration:  2 * time.Second,
+				Success:   true,
+			},
+		},
+	}
+
+	a := Analyse(sess)
+
+	assert.Equal(t, 5*time.Second, a.Duration)
+	assert.Equal(t, 2*time.Second, a.ActiveTime)
+	assert.Equal(t, 1, a.EventCount)
+	assert.Equal(t, 1.0, a.SuccessRate)
+	assert.Equal(t, 1, a.ToolCounts["Bash"])
+	assert.Equal(t, 0, a.ErrorCounts["Bash"])
+	assert.Equal(t, 2*time.Second, a.AvgLatency["Bash"])
+	assert.Equal(t, 2*time.Second, a.MaxLatency["Bash"])
+}
+
+func TestAnalyse_MixedToolsWithErrors_Good(t *testing.T) {
+	sess := &Session{
+		ID:        "mixed",
+		StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC),
+		EndTime:   time.Date(2026, 2, 20, 10, 5, 0, 0, time.UTC),
+		Events: []Event{
+			{
+				Type:     "user",
+				Input:    "Please help",
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Bash",
+				Input:    "ls -la",
+				Output:   "total 42",
+				Duration: 1 * time.Second,
+				Success:  true,
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Bash",
+				Input:    "cat /missing",
+				Output:   "No such file",
+				Duration: 500 * time.Millisecond,
+				Success:  false,
+				ErrorMsg: "No such file",
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Read",
+				Input:    "/tmp/file.go",
+				Output:   "package main",
+				Duration: 200 * time.Millisecond,
+				Success:  true,
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Read",
+				Input:    "/tmp/missing.go",
+				Output:   "file not found",
+				Duration: 100 * time.Millisecond,
+				Success:  false,
+				ErrorMsg: "file not found",
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Edit",
+				Input:    "/tmp/file.go (edit)",
+				Output:   "ok",
+				Duration: 300 * time.Millisecond,
+				Success:  true,
+			},
+			{
+				Type:  "assistant",
+				Input: "All done.",
+			},
+		},
+	}
+
+	a := Analyse(sess)
+
+	assert.Equal(t, 5*time.Minute, a.Duration)
+	assert.Equal(t, 7, a.EventCount)
+
+	// Tool counts
+	assert.Equal(t, 2, a.ToolCounts["Bash"])
+	assert.Equal(t, 2, a.ToolCounts["Read"])
+	assert.Equal(t, 1, a.ToolCounts["Edit"])
+
+	// Error counts
+	assert.Equal(t, 1, a.ErrorCounts["Bash"])
+	assert.Equal(t, 1, a.ErrorCounts["Read"])
+	assert.Equal(t, 0, a.ErrorCounts["Edit"])
+
+	// Success rate: 3 successes out of 5 tool calls = 0.6
+	assert.InDelta(t, 0.6, a.SuccessRate, 0.001)
+
+	// Active time: 1s + 500ms + 200ms + 100ms + 300ms = 2.1s
+	assert.Equal(t, 2100*time.Millisecond, a.ActiveTime)
+}
+
+func TestAnalyse_LatencyCalculations_Good(t *testing.T) {
+	sess := &Session{
+		ID:        "latency",
+		StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC),
+		EndTime:   time.Date(2026, 2, 20, 10, 1, 0, 0, time.UTC),
+		Events: []Event{
+			{
+				Type:     "tool_use",
+				Tool:     "Bash",
+				Duration: 1 * time.Second,
+				Success:  true,
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Bash",
+				Duration: 3 * time.Second,
+				Success:  true,
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Bash",
+				Duration: 5 * time.Second,
+				Success:  true,
+			},
+			{
+				Type:     "tool_use",
+				Tool:     "Read",
+				Duration: 200 * time.Millisecond,
+				Success:  true,
+			},
+		},
+	}
+
+	a := Analyse(sess)
+
+	// Bash: avg = (1+3+5)/3 = 3s, max = 5s
+	assert.Equal(t, 3*time.Second, a.AvgLatency["Bash"])
+	assert.Equal(t, 5*time.Second, a.MaxLatency["Bash"])
+
+	// Read: avg = 200ms, max = 200ms
+	assert.Equal(t, 200*time.Millisecond, a.AvgLatency["Read"])
+	assert.Equal(t, 200*time.Millisecond, a.MaxLatency["Read"])
+}
+
+func TestAnalyse_TokenEstimation_Good(t *testing.T) {
+	// 4 chars = ~1 token
+	sess := &Session{
+		ID:        "tokens",
+		StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC),
+		EndTime:   time.Date(2026, 2, 20, 10, 0, 1, 0, time.UTC),
+		Events: []Event{
+			{
+				Type:  "user",
+				Input: strings.Repeat("a", 400), // 100 tokens
+			},
+			{
+				Type:   "tool_use",
+				Tool:   "Bash",
+				Input:  strings.Repeat("b", 80),  // 20 tokens
+				Output: strings.Repeat("c", 200), // 50 tokens
+				Duration: time.Second,
+				Success: true,
+			},
+			{
+				Type:  "assistant",
+				Input: strings.Repeat("d", 120), // 30 tokens
+			},
+		},
+	}
+
+	a := Analyse(sess)
+
+	// Input tokens: 400/4 + 80/4 + 120/4 = 100 + 20 + 30 = 150
+	assert.Equal(t, 150, a.EstimatedInputTokens)
+	// Output tokens: 0 + 200/4 + 0 = 50
+	assert.Equal(t, 50, a.EstimatedOutputTokens)
+}
+
+func TestFormatAnalytics_Output_Good(t *testing.T) {
+	a := &SessionAnalytics{
+		Duration:              5 * time.Minute,
+		ActiveTime:            2 * time.Minute,
+		EventCount:            42,
+		SuccessRate:           0.85,
+		EstimatedInputTokens:  1500,
+		EstimatedOutputTokens: 3000,
+		ToolCounts: map[string]int{
+			"Bash": 20,
+			"Read": 15,
+			"Edit": 7,
+		},
+		ErrorCounts: map[string]int{
+			"Bash": 3,
+		},
+		AvgLatency: map[string]time.Duration{
+			"Bash": 2 * time.Second,
+			"Read": 500 * time.Millisecond,
+			"Edit": 300 * time.Millisecond,
+		},
+		MaxLatency: map[string]time.Duration{
+			"Bash": 10 * time.Second,
+			"Read": 1 * time.Second,
+			"Edit": 800 * time.Millisecond,
+		},
+	}
+
+	output := FormatAnalytics(a)
+
+	assert.Contains(t, output, "Session Analytics")
+	assert.Contains(t, output, "5m0s")
+	assert.Contains(t, output, "2m0s")
+	assert.Contains(t, output, "42")
+	assert.Contains(t, output, "85.0%")
+	assert.Contains(t, output, "1500")
+	assert.Contains(t, output, "3000")
+	assert.Contains(t, output, "Bash")
+	assert.Contains(t, output, "Read")
+	assert.Contains(t, output, "Edit")
+	assert.Contains(t, output, "Tool Breakdown")
+}
+
+func TestFormatAnalytics_EmptyAnalytics_Good(t *testing.T) {
+	a := &SessionAnalytics{
+		ToolCounts:  make(map[string]int),
+		ErrorCounts: make(map[string]int),
+		AvgLatency:  make(map[string]time.Duration),
+		MaxLatency:  make(map[string]time.Duration),
+	}
+
+	output := FormatAnalytics(a)
+
+	assert.Contains(t, output, "Session Analytics")
+	assert.Contains(t, output, "0.0%")
+	// No tool breakdown section when no tools
+	assert.NotContains(t, output, "Tool Breakdown")
+}
--- a/bench_test.go
+++ b/bench_test.go
@ -18,7 +18,7 @@ func BenchmarkParseTranscript(b *testing.B) {
 	b.ReportAllocs()

 	for b.Loop() {
-		sess, err := ParseTranscript(path)
+		sess, _, err := ParseTranscript(path)
 		if err != nil {
 			b.Fatal(err)
 		}
@ -37,7 +37,7 @@ func BenchmarkParseTranscript_Large(b *testing.B) {
 	b.ReportAllocs()

 	for b.Loop() {
-		_, err := ParseTranscript(path)
+		_, _, err := ParseTranscript(path)
 		if err != nil {
 			b.Fatal(err)
 		}
--- a/parser.go
+++ b/parser.go
@ -97,6 +97,14 @@ type taskInput struct {
 	SubagentType string `json:"subagent_type"`
 }

+// ParseStats reports diagnostic information from a parse run.
+type ParseStats struct {
+	TotalLines        int
+	SkippedLines      int
+	OrphanedToolCalls int
+	Warnings          []string
+}
+
 // ListSessions returns all sessions found in the Claude projects directory.
 func ListSessions(projectsDir string) ([]Session, error) {
 	matches, err := filepath.Glob(filepath.Join(projectsDir, "*.jsonl"))
@ -164,10 +172,10 @@ func ListSessions(projectsDir string) ([]Session, error) {
 }

 // ParseTranscript reads a JSONL session file and returns structured events.
-func ParseTranscript(path string) (*Session, error) {
+func ParseTranscript(path string) (*Session, *ParseStats, error) {
 	f, err := os.Open(path)
 	if err != nil {
-		return nil, fmt.Errorf("open transcript: %w", err)
+		return nil, nil, fmt.Errorf("open transcript: %w", err)
 	}
 	defer f.Close()

@ -177,6 +185,8 @@ func ParseTranscript(path string) (*Session, error) {
 		Path: path,
 	}

+	stats := &ParseStats{}
+
 	// Collect tool_use entries keyed by ID
 	type toolUse struct {
 		timestamp time.Time
@ -188,9 +198,32 @@ func ParseTranscript(path string) (*Session, error) {
 	scanner := bufio.NewScanner(f)
 	scanner.Buffer(make([]byte, 4*1024*1024), 4*1024*1024)

+	var lineNum int
+	var lastRaw string
+	var lastLineFailed bool
+
 	for scanner.Scan() {
+		lineNum++
+		stats.TotalLines++
+
+		raw := scanner.Text()
+		if strings.TrimSpace(raw) == "" {
+			continue
+		}
+
+		lastRaw = raw
+		lastLineFailed = false
+
 		var entry rawEntry
-		if err := json.Unmarshal(scanner.Bytes(), &entry); err != nil {
+		if err := json.Unmarshal([]byte(raw), &entry); err != nil {
+			stats.SkippedLines++
+			preview := raw
+			if len(preview) > 100 {
+				preview = preview[:100]
+			}
+			stats.Warnings = append(stats.Warnings,
+				fmt.Sprintf("line %d: skipped (bad JSON): %s", lineNum, preview))
+			lastLineFailed = true
 			continue
 		}

@ -281,7 +314,26 @@ func ParseTranscript(path string) (*Session, error) {
 		}
 	}

-	return sess, scanner.Err()
+	// Detect truncated final line
+	if lastLineFailed && lastRaw != "" {
+		stats.Warnings = append(stats.Warnings, "truncated final line")
+	}
+
+	// Check for scanner buffer errors
+	if scanErr := scanner.Err(); scanErr != nil {
+		return nil, stats, scanErr
+	}
+
+	// Track orphaned tool calls (tool_use with no matching result)
+	stats.OrphanedToolCalls = len(pendingTools)
+	if stats.OrphanedToolCalls > 0 {
+		for id := range pendingTools {
+			stats.Warnings = append(stats.Warnings,
+				fmt.Sprintf("orphaned tool call: %s", id))
+		}
+	}
+
+	return sess, stats, nil
 }

 func extractToolInput(toolName string, raw json.RawMessage) string {
--- a/parser_test.go
+++ b/parser_test.go
@ -117,7 +117,7 @@ func TestParseTranscript_MinimalValid_Good(t *testing.T) {
 		assistantTextEntry(ts(1), "Hi there!"),
 	)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)
 	require.NotNil(t, sess)

@ -187,7 +187,7 @@ func TestParseTranscript_ToolCalls_Good(t *testing.T) {

 	path := writeJSONL(t, dir, "tools.jsonl", lines...)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	// Count tool_use events
@ -235,7 +235,7 @@ func TestParseTranscript_ToolError_Good(t *testing.T) {
 		toolResultEntry(ts(1), "tool-err-1", "cat: /nonexistent: No such file or directory", true),
 	)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	var toolEvents []Event
@ -256,7 +256,7 @@ func TestParseTranscript_EmptyFile_Bad(t *testing.T) {
 	// Write a truly empty file
 	require.NoError(t, os.WriteFile(path, []byte(""), 0644))

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)
 	require.NotNil(t, sess)
 	assert.Empty(t, sess.Events)
@ -273,7 +273,7 @@ func TestParseTranscript_MalformedJSON_Bad(t *testing.T) {
 		assistantTextEntry(ts(2), "This is also valid"),
 	)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err, "malformed lines should be skipped, not cause an error")
 	require.NotNil(t, sess)

@ -292,7 +292,7 @@ func TestParseTranscript_TruncatedJSONL_Bad(t *testing.T) {

 	path := writeJSONL(t, dir, "truncated.jsonl", validLine, truncated)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err, "truncated last line should be skipped gracefully")
 	require.NotNil(t, sess)

@ -322,7 +322,7 @@ func TestParseTranscript_LargeSession_Good(t *testing.T) {

 	path := writeJSONL(t, dir, "large.jsonl", lines...)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	var toolCount int
@ -368,7 +368,7 @@ func TestParseTranscript_NestedToolResults_Good(t *testing.T) {

 	path := writeJSONL(t, dir, "nested.jsonl", lines...)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	var toolEvents []Event
@ -413,7 +413,7 @@ func TestParseTranscript_NestedMapResult_Good(t *testing.T) {

 	path := writeJSONL(t, dir, "map-result.jsonl", lines...)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	var toolEvents []Event
@ -428,7 +428,7 @@ func TestParseTranscript_NestedMapResult_Good(t *testing.T) {
 }

 func TestParseTranscript_FileNotFound_Ugly(t *testing.T) {
-	_, err := ParseTranscript("/nonexistent/path/session.jsonl")
+	_, _, err := ParseTranscript("/nonexistent/path/session.jsonl")
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "open transcript")
 }
@ -439,7 +439,7 @@ func TestParseTranscript_SessionIDFromFilename_Good(t *testing.T) {
 		userTextEntry(ts(0), "test"),
 	)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)
 	assert.Equal(t, "abc123def456", sess.ID)
 }
@ -452,7 +452,7 @@ func TestParseTranscript_TimestampsTracked_Good(t *testing.T) {
 		userTextEntry(ts(10), "end"),
 	)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	expectedStart, _ := time.Parse(time.RFC3339Nano, ts(0))
@ -469,7 +469,7 @@ func TestParseTranscript_TextTruncation_Good(t *testing.T) {
 		userTextEntry(ts(0), longText),
 	)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	require.Len(t, sess.Events, 1)
@ -506,7 +506,7 @@ func TestParseTranscript_MixedContentBlocks_Good(t *testing.T) {

 	path := writeJSONL(t, dir, "mixed.jsonl", lines...)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	// Should have an assistant text event + a tool_use event
@ -524,7 +524,7 @@ func TestParseTranscript_UnmatchedToolResult_Bad(t *testing.T) {
 		userTextEntry(ts(1), "Normal message"),
 	)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	// Only the user text event should appear; the orphan tool result is ignored
@ -548,7 +548,7 @@ func TestParseTranscript_EmptyTimestamp_Bad(t *testing.T) {
 	})
 	path := writeJSONL(t, dir, "no-ts.jsonl", line)

-	sess, err := ParseTranscript(path)
+	sess, _, err := ParseTranscript(path)
 	require.NoError(t, err)

 	// The event should still be parsed, but StartTime remains zero
@ -781,3 +781,174 @@ func TestFormatDuration_Good(t *testing.T) {
 	assert.Equal(t, "2m30s", formatDuration(2*time.Minute+30*time.Second))
 	assert.Equal(t, "1h5m", formatDuration(1*time.Hour+5*time.Minute))
 }
+
+// --- ParseStats tests ---
+
+func TestParseStats_CleanJSONL_Good(t *testing.T) {
+	dir := t.TempDir()
+	path := writeJSONL(t, dir, "clean.jsonl",
+		userTextEntry(ts(0), "Hello"),
+		toolUseEntry(ts(1), "Bash", "tool-1", map[string]interface{}{
+			"command": "ls",
+		}),
+		toolResultEntry(ts(2), "tool-1", "ok", false),
+		assistantTextEntry(ts(3), "Done"),
+	)
+
+	_, stats, err := ParseTranscript(path)
+	require.NoError(t, err)
+	require.NotNil(t, stats)
+
+	assert.Equal(t, 4, stats.TotalLines)
+	assert.Equal(t, 0, stats.SkippedLines)
+	assert.Equal(t, 0, stats.OrphanedToolCalls)
+	assert.Empty(t, stats.Warnings)
+}
+
+func TestParseStats_MalformedLines_Good(t *testing.T) {
+	dir := t.TempDir()
+	path := writeJSONL(t, dir, "malformed-stats.jsonl",
+		`{bad json line one`,
+		userTextEntry(ts(0), "Valid line"),
+		`{another bad line}}}`,
+		`not even close to json`,
+		assistantTextEntry(ts(1), "Also valid"),
+	)
+
+	_, stats, err := ParseTranscript(path)
+	require.NoError(t, err)
+	require.NotNil(t, stats)
+
+	assert.Equal(t, 5, stats.TotalLines)
+	assert.Equal(t, 3, stats.SkippedLines)
+	assert.Len(t, stats.Warnings, 3)
+
+	// Each warning should contain line number and preview
+	for _, w := range stats.Warnings {
+		assert.Contains(t, w, "skipped (bad JSON)")
+	}
+}
+
+func TestParseStats_OrphanedToolCalls_Good(t *testing.T) {
+	dir := t.TempDir()
+	// Two tool_use entries with no matching tool_result
+	path := writeJSONL(t, dir, "orphaned.jsonl",
+		toolUseEntry(ts(0), "Bash", "orphan-1", map[string]interface{}{
+			"command": "ls",
+		}),
+		toolUseEntry(ts(1), "Read", "orphan-2", map[string]interface{}{
+			"file_path": "/tmp/file.go",
+		}),
+		assistantTextEntry(ts(2), "Never got results"),
+	)
+
+	_, stats, err := ParseTranscript(path)
+	require.NoError(t, err)
+	require.NotNil(t, stats)
+
+	assert.Equal(t, 2, stats.OrphanedToolCalls)
+
+	// Warnings should mention orphaned tool IDs
+	var orphanWarnings int
+	for _, w := range stats.Warnings {
+		if strings.Contains(w, "orphaned tool call") {
+			orphanWarnings++
+		}
+	}
+	assert.Equal(t, 2, orphanWarnings)
+}
+
+func TestParseStats_TruncatedFinalLine_Good(t *testing.T) {
+	dir := t.TempDir()
+	validLine := userTextEntry(ts(0), "Hello")
+	truncatedLine := `{"type":"assi`
+
+	// Write without trailing newline after truncated line
+	path := filepath.Join(dir, "truncfinal.jsonl")
+	require.NoError(t, os.WriteFile(path, []byte(validLine+"\n"+truncatedLine+"\n"), 0644))
+
+	_, stats, err := ParseTranscript(path)
+	require.NoError(t, err)
+	require.NotNil(t, stats)
+
+	assert.Equal(t, 1, stats.SkippedLines)
+
+	// Should detect truncated final line
+	var foundTruncated bool
+	for _, w := range stats.Warnings {
+		if strings.Contains(w, "truncated final line") {
+			foundTruncated = true
+		}
+	}
+	assert.True(t, foundTruncated, "should detect truncated final line")
+}
+
+func TestParseStats_FileEndingMidJSON_Good(t *testing.T) {
+	dir := t.TempDir()
+	validLine := userTextEntry(ts(0), "Hello")
+	midJSON := `{"type":"assistant","timestamp":"2026-02-20T10:00:01Z","sessionId":"test","message":{"role":"assi`
+
+	path := filepath.Join(dir, "midjson.jsonl")
+	require.NoError(t, os.WriteFile(path, []byte(validLine+"\n"+midJSON+"\n"), 0644))
+
+	sess, stats, err := ParseTranscript(path)
+	require.NoError(t, err)
+	require.NotNil(t, sess)
+	require.NotNil(t, stats)
+
+	assert.Equal(t, 1, stats.SkippedLines)
+
+	var foundTruncated bool
+	for _, w := range stats.Warnings {
+		if strings.Contains(w, "truncated final line") {
+			foundTruncated = true
+		}
+	}
+	assert.True(t, foundTruncated)
+}
+
+func TestParseStats_CompleteFileNoTrailingNewline_Good(t *testing.T) {
+	dir := t.TempDir()
+	line := userTextEntry(ts(0), "Hello")
+
+	// Write without trailing newline — should still parse fine
+	path := filepath.Join(dir, "nonewline.jsonl")
+	require.NoError(t, os.WriteFile(path, []byte(line), 0644))
+
+	sess, stats, err := ParseTranscript(path)
+	require.NoError(t, err)
+	require.NotNil(t, sess)
+	require.NotNil(t, stats)
+
+	assert.Equal(t, 0, stats.SkippedLines)
+	assert.Equal(t, 0, stats.OrphanedToolCalls)
+	assert.Len(t, sess.Events, 1)
+
+	// No truncation warning since the line parsed successfully
+	var foundTruncated bool
+	for _, w := range stats.Warnings {
+		if strings.Contains(w, "truncated final line") {
+			foundTruncated = true
+		}
+	}
+	assert.False(t, foundTruncated)
+}
+
+func TestParseStats_WarningPreviewTruncated_Good(t *testing.T) {
+	dir := t.TempDir()
+	// A malformed line longer than 100 chars
+	longBadLine := `{` + strings.Repeat("x", 200)
+	path := writeJSONL(t, dir, "longbad.jsonl",
+		longBadLine,
+		userTextEntry(ts(0), "Valid"),
+	)
+
+	_, stats, err := ParseTranscript(path)
+	require.NoError(t, err)
+
+	require.Len(t, stats.Warnings, 1) // 1 skipped line (last line is valid, no truncation)
+	// The preview in the warning should be at most ~100 chars of the bad line
+	assert.True(t, len(stats.Warnings[0]) < 200,
+		"warning preview should be truncated for long lines")
+	assert.Contains(t, stats.Warnings[0], "line 1:")
+}
--- a/search.go
+++ b/search.go
@ -25,7 +25,7 @@ func Search(projectsDir, query string) ([]SearchResult, error) {
 	query = strings.ToLower(query)

 	for _, path := range matches {
-		sess, err := ParseTranscript(path)
+		sess, _, err := ParseTranscript(path)
 		if err != nil {
 			continue
 		}