go-ml/cmd/cmd_score.go

package cmd

import (
	"context"
	"fmt"
	"maps"
	"slices"
	"time"

	"forge.lthn.ai/core/cli/pkg/cli"
	"forge.lthn.ai/core/go-ml"
)

var (
	scoreInput  string
	scoreSuites string
	scoreOutput string
	scoreConcur int
)

var scoreCmd = &cli.Command{
	Use:   "score",
	Short: "Score responses with heuristic and LLM judges",
	Long:  "Reads a JSONL file of prompt/response pairs and scores them across configured suites.",
	RunE:  runScore,
}

func init() {
	scoreCmd.Flags().StringVar(&scoreInput, "input", "", "Input JSONL file with prompt/response pairs (required)")
	scoreCmd.Flags().StringVar(&scoreSuites, "suites", "all", "Comma-separated scoring suites (heuristic,semantic,content,exact,truthfulqa,donotanswer,toxigen)")
	scoreCmd.Flags().StringVar(&scoreOutput, "output", "", "Output JSON file for scores")
	scoreCmd.Flags().IntVar(&scoreConcur, "concurrency", 4, "Number of concurrent scoring workers")
	scoreCmd.MarkFlagRequired("input")
}

func runScore(cmd *cli.Command, args []string) error {
	responses, err := ml.ReadResponses(scoreInput)
	if err != nil {
		return fmt.Errorf("read input: %w", err)
	}

	var judge *ml.Judge
	if judgeURL != "" {
		backend := ml.NewHTTPBackend(judgeURL, judgeModel)
		judge = ml.NewJudge(backend)
	}

	engine := ml.NewEngine(judge, scoreConcur, scoreSuites)

	ctx := context.Background()
	perPrompt := engine.ScoreAll(ctx, responses)
	averages := ml.ComputeAverages(perPrompt)

	if scoreOutput != "" {
		output := &ml.ScorerOutput{
			Metadata: ml.Metadata{
				JudgeModel: judgeModel,
				JudgeURL:   judgeURL,
				ScoredAt:   time.Now(),
				Suites:     ml.SplitComma(scoreSuites),
			},
			ModelAverages: averages,
			PerPrompt:     perPrompt,
		}
		if err := ml.WriteScores(scoreOutput, output); err != nil {
			return fmt.Errorf("write output: %w", err)
		}
		fmt.Printf("Scores written to %s\n", scoreOutput)
	} else {
		for _, model := range slices.Sorted(maps.Keys(averages)) {
			avgs := averages[model]
			fmt.Printf("%s:\n", model)
			for _, field := range slices.Sorted(maps.Keys(avgs)) {
				fmt.Printf("  %-25s %.3f\n", field, avgs[field])
			}
		}
	}

	return nil
}