diff --git a/analytics.go b/analytics.go new file mode 100644 index 0000000..73c62db --- /dev/null +++ b/analytics.go @@ -0,0 +1,137 @@ +// SPDX-Licence-Identifier: EUPL-1.2 +package session + +import ( + "fmt" + "sort" + "strings" + "time" +) + +// SessionAnalytics holds computed metrics for a parsed session. +type SessionAnalytics struct { + Duration time.Duration + ActiveTime time.Duration + EventCount int + ToolCounts map[string]int + ErrorCounts map[string]int + SuccessRate float64 + AvgLatency map[string]time.Duration + MaxLatency map[string]time.Duration + EstimatedInputTokens int + EstimatedOutputTokens int +} + +// Analyse iterates session events and computes analytics. Pure function, no I/O. +func Analyse(sess *Session) *SessionAnalytics { + a := &SessionAnalytics{ + ToolCounts: make(map[string]int), + ErrorCounts: make(map[string]int), + AvgLatency: make(map[string]time.Duration), + MaxLatency: make(map[string]time.Duration), + } + + if sess == nil { + return a + } + + a.Duration = sess.EndTime.Sub(sess.StartTime) + a.EventCount = len(sess.Events) + + // Track totals for latency averaging + type latencyAccum struct { + total time.Duration + count int + } + latencies := make(map[string]*latencyAccum) + + var totalToolCalls int + var totalErrors int + + for _, evt := range sess.Events { + // Token estimation: ~4 chars per token + a.EstimatedInputTokens += len(evt.Input) / 4 + a.EstimatedOutputTokens += len(evt.Output) / 4 + + if evt.Type != "tool_use" { + continue + } + + totalToolCalls++ + a.ToolCounts[evt.Tool]++ + + if !evt.Success { + totalErrors++ + a.ErrorCounts[evt.Tool]++ + } + + // Active time: sum of tool call durations + a.ActiveTime += evt.Duration + + // Latency tracking + if _, ok := latencies[evt.Tool]; !ok { + latencies[evt.Tool] = &latencyAccum{} + } + latencies[evt.Tool].total += evt.Duration + latencies[evt.Tool].count++ + + if evt.Duration > a.MaxLatency[evt.Tool] { + a.MaxLatency[evt.Tool] = evt.Duration + } + } + + // Compute averages + for tool, acc := range latencies { + if acc.count > 0 { + a.AvgLatency[tool] = acc.total / time.Duration(acc.count) + } + } + + // Success rate + if totalToolCalls > 0 { + a.SuccessRate = float64(totalToolCalls-totalErrors) / float64(totalToolCalls) + } + + return a +} + +// FormatAnalytics returns a tabular text summary suitable for CLI display. +func FormatAnalytics(a *SessionAnalytics) string { + var b strings.Builder + + b.WriteString("Session Analytics\n") + b.WriteString(strings.Repeat("=", 50) + "\n\n") + + b.WriteString(fmt.Sprintf(" Duration: %s\n", formatDuration(a.Duration))) + b.WriteString(fmt.Sprintf(" Active Time: %s\n", formatDuration(a.ActiveTime))) + b.WriteString(fmt.Sprintf(" Events: %d\n", a.EventCount)) + b.WriteString(fmt.Sprintf(" Success Rate: %.1f%%\n", a.SuccessRate*100)) + b.WriteString(fmt.Sprintf(" Est. Input Tk: %d\n", a.EstimatedInputTokens)) + b.WriteString(fmt.Sprintf(" Est. Output Tk: %d\n", a.EstimatedOutputTokens)) + + if len(a.ToolCounts) > 0 { + b.WriteString("\n Tool Breakdown\n") + b.WriteString(" " + strings.Repeat("-", 48) + "\n") + b.WriteString(fmt.Sprintf(" %-14s %6s %6s %10s %10s\n", + "Tool", "Calls", "Errors", "Avg", "Max")) + b.WriteString(" " + strings.Repeat("-", 48) + "\n") + + // Sort tools for deterministic output + tools := make([]string, 0, len(a.ToolCounts)) + for t := range a.ToolCounts { + tools = append(tools, t) + } + sort.Strings(tools) + + for _, tool := range tools { + errors := a.ErrorCounts[tool] + avg := a.AvgLatency[tool] + max := a.MaxLatency[tool] + b.WriteString(fmt.Sprintf(" %-14s %6d %6d %10s %10s\n", + tool, a.ToolCounts[tool], errors, + formatDuration(avg), formatDuration(max))) + } + } + + return b.String() +} diff --git a/analytics_test.go b/analytics_test.go new file mode 100644 index 0000000..e1ae95c --- /dev/null +++ b/analytics_test.go @@ -0,0 +1,286 @@ +// SPDX-Licence-Identifier: EUPL-1.2 +package session + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAnalyse_EmptySession_Good(t *testing.T) { + sess := &Session{ + ID: "empty", + StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC), + EndTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC), + Events: nil, + } + + a := Analyse(sess) + require.NotNil(t, a) + + assert.Equal(t, time.Duration(0), a.Duration) + assert.Equal(t, time.Duration(0), a.ActiveTime) + assert.Equal(t, 0, a.EventCount) + assert.Equal(t, 0.0, a.SuccessRate) + assert.Empty(t, a.ToolCounts) + assert.Empty(t, a.ErrorCounts) + assert.Equal(t, 0, a.EstimatedInputTokens) + assert.Equal(t, 0, a.EstimatedOutputTokens) +} + +func TestAnalyse_NilSession_Good(t *testing.T) { + a := Analyse(nil) + require.NotNil(t, a) + assert.Equal(t, 0, a.EventCount) +} + +func TestAnalyse_SingleToolCall_Good(t *testing.T) { + sess := &Session{ + ID: "single", + StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC), + EndTime: time.Date(2026, 2, 20, 10, 0, 5, 0, time.UTC), + Events: []Event{ + { + Timestamp: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC), + Type: "tool_use", + Tool: "Bash", + Input: "go test ./...", + Output: "PASS", + Duration: 2 * time.Second, + Success: true, + }, + }, + } + + a := Analyse(sess) + + assert.Equal(t, 5*time.Second, a.Duration) + assert.Equal(t, 2*time.Second, a.ActiveTime) + assert.Equal(t, 1, a.EventCount) + assert.Equal(t, 1.0, a.SuccessRate) + assert.Equal(t, 1, a.ToolCounts["Bash"]) + assert.Equal(t, 0, a.ErrorCounts["Bash"]) + assert.Equal(t, 2*time.Second, a.AvgLatency["Bash"]) + assert.Equal(t, 2*time.Second, a.MaxLatency["Bash"]) +} + +func TestAnalyse_MixedToolsWithErrors_Good(t *testing.T) { + sess := &Session{ + ID: "mixed", + StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC), + EndTime: time.Date(2026, 2, 20, 10, 5, 0, 0, time.UTC), + Events: []Event{ + { + Type: "user", + Input: "Please help", + }, + { + Type: "tool_use", + Tool: "Bash", + Input: "ls -la", + Output: "total 42", + Duration: 1 * time.Second, + Success: true, + }, + { + Type: "tool_use", + Tool: "Bash", + Input: "cat /missing", + Output: "No such file", + Duration: 500 * time.Millisecond, + Success: false, + ErrorMsg: "No such file", + }, + { + Type: "tool_use", + Tool: "Read", + Input: "/tmp/file.go", + Output: "package main", + Duration: 200 * time.Millisecond, + Success: true, + }, + { + Type: "tool_use", + Tool: "Read", + Input: "/tmp/missing.go", + Output: "file not found", + Duration: 100 * time.Millisecond, + Success: false, + ErrorMsg: "file not found", + }, + { + Type: "tool_use", + Tool: "Edit", + Input: "/tmp/file.go (edit)", + Output: "ok", + Duration: 300 * time.Millisecond, + Success: true, + }, + { + Type: "assistant", + Input: "All done.", + }, + }, + } + + a := Analyse(sess) + + assert.Equal(t, 5*time.Minute, a.Duration) + assert.Equal(t, 7, a.EventCount) + + // Tool counts + assert.Equal(t, 2, a.ToolCounts["Bash"]) + assert.Equal(t, 2, a.ToolCounts["Read"]) + assert.Equal(t, 1, a.ToolCounts["Edit"]) + + // Error counts + assert.Equal(t, 1, a.ErrorCounts["Bash"]) + assert.Equal(t, 1, a.ErrorCounts["Read"]) + assert.Equal(t, 0, a.ErrorCounts["Edit"]) + + // Success rate: 3 successes out of 5 tool calls = 0.6 + assert.InDelta(t, 0.6, a.SuccessRate, 0.001) + + // Active time: 1s + 500ms + 200ms + 100ms + 300ms = 2.1s + assert.Equal(t, 2100*time.Millisecond, a.ActiveTime) +} + +func TestAnalyse_LatencyCalculations_Good(t *testing.T) { + sess := &Session{ + ID: "latency", + StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC), + EndTime: time.Date(2026, 2, 20, 10, 1, 0, 0, time.UTC), + Events: []Event{ + { + Type: "tool_use", + Tool: "Bash", + Duration: 1 * time.Second, + Success: true, + }, + { + Type: "tool_use", + Tool: "Bash", + Duration: 3 * time.Second, + Success: true, + }, + { + Type: "tool_use", + Tool: "Bash", + Duration: 5 * time.Second, + Success: true, + }, + { + Type: "tool_use", + Tool: "Read", + Duration: 200 * time.Millisecond, + Success: true, + }, + }, + } + + a := Analyse(sess) + + // Bash: avg = (1+3+5)/3 = 3s, max = 5s + assert.Equal(t, 3*time.Second, a.AvgLatency["Bash"]) + assert.Equal(t, 5*time.Second, a.MaxLatency["Bash"]) + + // Read: avg = 200ms, max = 200ms + assert.Equal(t, 200*time.Millisecond, a.AvgLatency["Read"]) + assert.Equal(t, 200*time.Millisecond, a.MaxLatency["Read"]) +} + +func TestAnalyse_TokenEstimation_Good(t *testing.T) { + // 4 chars = ~1 token + sess := &Session{ + ID: "tokens", + StartTime: time.Date(2026, 2, 20, 10, 0, 0, 0, time.UTC), + EndTime: time.Date(2026, 2, 20, 10, 0, 1, 0, time.UTC), + Events: []Event{ + { + Type: "user", + Input: strings.Repeat("a", 400), // 100 tokens + }, + { + Type: "tool_use", + Tool: "Bash", + Input: strings.Repeat("b", 80), // 20 tokens + Output: strings.Repeat("c", 200), // 50 tokens + Duration: time.Second, + Success: true, + }, + { + Type: "assistant", + Input: strings.Repeat("d", 120), // 30 tokens + }, + }, + } + + a := Analyse(sess) + + // Input tokens: 400/4 + 80/4 + 120/4 = 100 + 20 + 30 = 150 + assert.Equal(t, 150, a.EstimatedInputTokens) + // Output tokens: 0 + 200/4 + 0 = 50 + assert.Equal(t, 50, a.EstimatedOutputTokens) +} + +func TestFormatAnalytics_Output_Good(t *testing.T) { + a := &SessionAnalytics{ + Duration: 5 * time.Minute, + ActiveTime: 2 * time.Minute, + EventCount: 42, + SuccessRate: 0.85, + EstimatedInputTokens: 1500, + EstimatedOutputTokens: 3000, + ToolCounts: map[string]int{ + "Bash": 20, + "Read": 15, + "Edit": 7, + }, + ErrorCounts: map[string]int{ + "Bash": 3, + }, + AvgLatency: map[string]time.Duration{ + "Bash": 2 * time.Second, + "Read": 500 * time.Millisecond, + "Edit": 300 * time.Millisecond, + }, + MaxLatency: map[string]time.Duration{ + "Bash": 10 * time.Second, + "Read": 1 * time.Second, + "Edit": 800 * time.Millisecond, + }, + } + + output := FormatAnalytics(a) + + assert.Contains(t, output, "Session Analytics") + assert.Contains(t, output, "5m0s") + assert.Contains(t, output, "2m0s") + assert.Contains(t, output, "42") + assert.Contains(t, output, "85.0%") + assert.Contains(t, output, "1500") + assert.Contains(t, output, "3000") + assert.Contains(t, output, "Bash") + assert.Contains(t, output, "Read") + assert.Contains(t, output, "Edit") + assert.Contains(t, output, "Tool Breakdown") +} + +func TestFormatAnalytics_EmptyAnalytics_Good(t *testing.T) { + a := &SessionAnalytics{ + ToolCounts: make(map[string]int), + ErrorCounts: make(map[string]int), + AvgLatency: make(map[string]time.Duration), + MaxLatency: make(map[string]time.Duration), + } + + output := FormatAnalytics(a) + + assert.Contains(t, output, "Session Analytics") + assert.Contains(t, output, "0.0%") + // No tool breakdown section when no tools + assert.NotContains(t, output, "Tool Breakdown") +} diff --git a/bench_test.go b/bench_test.go index e49ef2c..2a465aa 100644 --- a/bench_test.go +++ b/bench_test.go @@ -18,7 +18,7 @@ func BenchmarkParseTranscript(b *testing.B) { b.ReportAllocs() for b.Loop() { - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) if err != nil { b.Fatal(err) } @@ -37,7 +37,7 @@ func BenchmarkParseTranscript_Large(b *testing.B) { b.ReportAllocs() for b.Loop() { - _, err := ParseTranscript(path) + _, _, err := ParseTranscript(path) if err != nil { b.Fatal(err) } diff --git a/parser.go b/parser.go index 6304189..7500523 100644 --- a/parser.go +++ b/parser.go @@ -97,6 +97,14 @@ type taskInput struct { SubagentType string `json:"subagent_type"` } +// ParseStats reports diagnostic information from a parse run. +type ParseStats struct { + TotalLines int + SkippedLines int + OrphanedToolCalls int + Warnings []string +} + // ListSessions returns all sessions found in the Claude projects directory. func ListSessions(projectsDir string) ([]Session, error) { matches, err := filepath.Glob(filepath.Join(projectsDir, "*.jsonl")) @@ -164,10 +172,10 @@ func ListSessions(projectsDir string) ([]Session, error) { } // ParseTranscript reads a JSONL session file and returns structured events. -func ParseTranscript(path string) (*Session, error) { +func ParseTranscript(path string) (*Session, *ParseStats, error) { f, err := os.Open(path) if err != nil { - return nil, fmt.Errorf("open transcript: %w", err) + return nil, nil, fmt.Errorf("open transcript: %w", err) } defer f.Close() @@ -177,6 +185,8 @@ func ParseTranscript(path string) (*Session, error) { Path: path, } + stats := &ParseStats{} + // Collect tool_use entries keyed by ID type toolUse struct { timestamp time.Time @@ -188,9 +198,32 @@ func ParseTranscript(path string) (*Session, error) { scanner := bufio.NewScanner(f) scanner.Buffer(make([]byte, 4*1024*1024), 4*1024*1024) + var lineNum int + var lastRaw string + var lastLineFailed bool + for scanner.Scan() { + lineNum++ + stats.TotalLines++ + + raw := scanner.Text() + if strings.TrimSpace(raw) == "" { + continue + } + + lastRaw = raw + lastLineFailed = false + var entry rawEntry - if err := json.Unmarshal(scanner.Bytes(), &entry); err != nil { + if err := json.Unmarshal([]byte(raw), &entry); err != nil { + stats.SkippedLines++ + preview := raw + if len(preview) > 100 { + preview = preview[:100] + } + stats.Warnings = append(stats.Warnings, + fmt.Sprintf("line %d: skipped (bad JSON): %s", lineNum, preview)) + lastLineFailed = true continue } @@ -281,7 +314,26 @@ func ParseTranscript(path string) (*Session, error) { } } - return sess, scanner.Err() + // Detect truncated final line + if lastLineFailed && lastRaw != "" { + stats.Warnings = append(stats.Warnings, "truncated final line") + } + + // Check for scanner buffer errors + if scanErr := scanner.Err(); scanErr != nil { + return nil, stats, scanErr + } + + // Track orphaned tool calls (tool_use with no matching result) + stats.OrphanedToolCalls = len(pendingTools) + if stats.OrphanedToolCalls > 0 { + for id := range pendingTools { + stats.Warnings = append(stats.Warnings, + fmt.Sprintf("orphaned tool call: %s", id)) + } + } + + return sess, stats, nil } func extractToolInput(toolName string, raw json.RawMessage) string { diff --git a/parser_test.go b/parser_test.go index 93013cf..fbc9268 100644 --- a/parser_test.go +++ b/parser_test.go @@ -117,7 +117,7 @@ func TestParseTranscript_MinimalValid_Good(t *testing.T) { assistantTextEntry(ts(1), "Hi there!"), ) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) require.NotNil(t, sess) @@ -187,7 +187,7 @@ func TestParseTranscript_ToolCalls_Good(t *testing.T) { path := writeJSONL(t, dir, "tools.jsonl", lines...) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) // Count tool_use events @@ -235,7 +235,7 @@ func TestParseTranscript_ToolError_Good(t *testing.T) { toolResultEntry(ts(1), "tool-err-1", "cat: /nonexistent: No such file or directory", true), ) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) var toolEvents []Event @@ -256,7 +256,7 @@ func TestParseTranscript_EmptyFile_Bad(t *testing.T) { // Write a truly empty file require.NoError(t, os.WriteFile(path, []byte(""), 0644)) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) require.NotNil(t, sess) assert.Empty(t, sess.Events) @@ -273,7 +273,7 @@ func TestParseTranscript_MalformedJSON_Bad(t *testing.T) { assistantTextEntry(ts(2), "This is also valid"), ) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err, "malformed lines should be skipped, not cause an error") require.NotNil(t, sess) @@ -292,7 +292,7 @@ func TestParseTranscript_TruncatedJSONL_Bad(t *testing.T) { path := writeJSONL(t, dir, "truncated.jsonl", validLine, truncated) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err, "truncated last line should be skipped gracefully") require.NotNil(t, sess) @@ -322,7 +322,7 @@ func TestParseTranscript_LargeSession_Good(t *testing.T) { path := writeJSONL(t, dir, "large.jsonl", lines...) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) var toolCount int @@ -368,7 +368,7 @@ func TestParseTranscript_NestedToolResults_Good(t *testing.T) { path := writeJSONL(t, dir, "nested.jsonl", lines...) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) var toolEvents []Event @@ -413,7 +413,7 @@ func TestParseTranscript_NestedMapResult_Good(t *testing.T) { path := writeJSONL(t, dir, "map-result.jsonl", lines...) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) var toolEvents []Event @@ -428,7 +428,7 @@ func TestParseTranscript_NestedMapResult_Good(t *testing.T) { } func TestParseTranscript_FileNotFound_Ugly(t *testing.T) { - _, err := ParseTranscript("/nonexistent/path/session.jsonl") + _, _, err := ParseTranscript("/nonexistent/path/session.jsonl") require.Error(t, err) assert.Contains(t, err.Error(), "open transcript") } @@ -439,7 +439,7 @@ func TestParseTranscript_SessionIDFromFilename_Good(t *testing.T) { userTextEntry(ts(0), "test"), ) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) assert.Equal(t, "abc123def456", sess.ID) } @@ -452,7 +452,7 @@ func TestParseTranscript_TimestampsTracked_Good(t *testing.T) { userTextEntry(ts(10), "end"), ) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) expectedStart, _ := time.Parse(time.RFC3339Nano, ts(0)) @@ -469,7 +469,7 @@ func TestParseTranscript_TextTruncation_Good(t *testing.T) { userTextEntry(ts(0), longText), ) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) require.Len(t, sess.Events, 1) @@ -506,7 +506,7 @@ func TestParseTranscript_MixedContentBlocks_Good(t *testing.T) { path := writeJSONL(t, dir, "mixed.jsonl", lines...) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) // Should have an assistant text event + a tool_use event @@ -524,7 +524,7 @@ func TestParseTranscript_UnmatchedToolResult_Bad(t *testing.T) { userTextEntry(ts(1), "Normal message"), ) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) // Only the user text event should appear; the orphan tool result is ignored @@ -548,7 +548,7 @@ func TestParseTranscript_EmptyTimestamp_Bad(t *testing.T) { }) path := writeJSONL(t, dir, "no-ts.jsonl", line) - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) require.NoError(t, err) // The event should still be parsed, but StartTime remains zero @@ -781,3 +781,174 @@ func TestFormatDuration_Good(t *testing.T) { assert.Equal(t, "2m30s", formatDuration(2*time.Minute+30*time.Second)) assert.Equal(t, "1h5m", formatDuration(1*time.Hour+5*time.Minute)) } + +// --- ParseStats tests --- + +func TestParseStats_CleanJSONL_Good(t *testing.T) { + dir := t.TempDir() + path := writeJSONL(t, dir, "clean.jsonl", + userTextEntry(ts(0), "Hello"), + toolUseEntry(ts(1), "Bash", "tool-1", map[string]interface{}{ + "command": "ls", + }), + toolResultEntry(ts(2), "tool-1", "ok", false), + assistantTextEntry(ts(3), "Done"), + ) + + _, stats, err := ParseTranscript(path) + require.NoError(t, err) + require.NotNil(t, stats) + + assert.Equal(t, 4, stats.TotalLines) + assert.Equal(t, 0, stats.SkippedLines) + assert.Equal(t, 0, stats.OrphanedToolCalls) + assert.Empty(t, stats.Warnings) +} + +func TestParseStats_MalformedLines_Good(t *testing.T) { + dir := t.TempDir() + path := writeJSONL(t, dir, "malformed-stats.jsonl", + `{bad json line one`, + userTextEntry(ts(0), "Valid line"), + `{another bad line}}}`, + `not even close to json`, + assistantTextEntry(ts(1), "Also valid"), + ) + + _, stats, err := ParseTranscript(path) + require.NoError(t, err) + require.NotNil(t, stats) + + assert.Equal(t, 5, stats.TotalLines) + assert.Equal(t, 3, stats.SkippedLines) + assert.Len(t, stats.Warnings, 3) + + // Each warning should contain line number and preview + for _, w := range stats.Warnings { + assert.Contains(t, w, "skipped (bad JSON)") + } +} + +func TestParseStats_OrphanedToolCalls_Good(t *testing.T) { + dir := t.TempDir() + // Two tool_use entries with no matching tool_result + path := writeJSONL(t, dir, "orphaned.jsonl", + toolUseEntry(ts(0), "Bash", "orphan-1", map[string]interface{}{ + "command": "ls", + }), + toolUseEntry(ts(1), "Read", "orphan-2", map[string]interface{}{ + "file_path": "/tmp/file.go", + }), + assistantTextEntry(ts(2), "Never got results"), + ) + + _, stats, err := ParseTranscript(path) + require.NoError(t, err) + require.NotNil(t, stats) + + assert.Equal(t, 2, stats.OrphanedToolCalls) + + // Warnings should mention orphaned tool IDs + var orphanWarnings int + for _, w := range stats.Warnings { + if strings.Contains(w, "orphaned tool call") { + orphanWarnings++ + } + } + assert.Equal(t, 2, orphanWarnings) +} + +func TestParseStats_TruncatedFinalLine_Good(t *testing.T) { + dir := t.TempDir() + validLine := userTextEntry(ts(0), "Hello") + truncatedLine := `{"type":"assi` + + // Write without trailing newline after truncated line + path := filepath.Join(dir, "truncfinal.jsonl") + require.NoError(t, os.WriteFile(path, []byte(validLine+"\n"+truncatedLine+"\n"), 0644)) + + _, stats, err := ParseTranscript(path) + require.NoError(t, err) + require.NotNil(t, stats) + + assert.Equal(t, 1, stats.SkippedLines) + + // Should detect truncated final line + var foundTruncated bool + for _, w := range stats.Warnings { + if strings.Contains(w, "truncated final line") { + foundTruncated = true + } + } + assert.True(t, foundTruncated, "should detect truncated final line") +} + +func TestParseStats_FileEndingMidJSON_Good(t *testing.T) { + dir := t.TempDir() + validLine := userTextEntry(ts(0), "Hello") + midJSON := `{"type":"assistant","timestamp":"2026-02-20T10:00:01Z","sessionId":"test","message":{"role":"assi` + + path := filepath.Join(dir, "midjson.jsonl") + require.NoError(t, os.WriteFile(path, []byte(validLine+"\n"+midJSON+"\n"), 0644)) + + sess, stats, err := ParseTranscript(path) + require.NoError(t, err) + require.NotNil(t, sess) + require.NotNil(t, stats) + + assert.Equal(t, 1, stats.SkippedLines) + + var foundTruncated bool + for _, w := range stats.Warnings { + if strings.Contains(w, "truncated final line") { + foundTruncated = true + } + } + assert.True(t, foundTruncated) +} + +func TestParseStats_CompleteFileNoTrailingNewline_Good(t *testing.T) { + dir := t.TempDir() + line := userTextEntry(ts(0), "Hello") + + // Write without trailing newline — should still parse fine + path := filepath.Join(dir, "nonewline.jsonl") + require.NoError(t, os.WriteFile(path, []byte(line), 0644)) + + sess, stats, err := ParseTranscript(path) + require.NoError(t, err) + require.NotNil(t, sess) + require.NotNil(t, stats) + + assert.Equal(t, 0, stats.SkippedLines) + assert.Equal(t, 0, stats.OrphanedToolCalls) + assert.Len(t, sess.Events, 1) + + // No truncation warning since the line parsed successfully + var foundTruncated bool + for _, w := range stats.Warnings { + if strings.Contains(w, "truncated final line") { + foundTruncated = true + } + } + assert.False(t, foundTruncated) +} + +func TestParseStats_WarningPreviewTruncated_Good(t *testing.T) { + dir := t.TempDir() + // A malformed line longer than 100 chars + longBadLine := `{` + strings.Repeat("x", 200) + path := writeJSONL(t, dir, "longbad.jsonl", + longBadLine, + userTextEntry(ts(0), "Valid"), + ) + + _, stats, err := ParseTranscript(path) + require.NoError(t, err) + + require.Len(t, stats.Warnings, 1) // 1 skipped line (last line is valid, no truncation) + // The preview in the warning should be at most ~100 chars of the bad line + assert.True(t, len(stats.Warnings[0]) < 200, + "warning preview should be truncated for long lines") + assert.Contains(t, stats.Warnings[0], "line 1:") +} diff --git a/search.go b/search.go index 71d4cb2..69dc9c3 100644 --- a/search.go +++ b/search.go @@ -25,7 +25,7 @@ func Search(projectsDir, query string) ([]SearchResult, error) { query = strings.ToLower(query) for _, path := range matches { - sess, err := ParseTranscript(path) + sess, _, err := ParseTranscript(path) if err != nil { continue }