diff --git a/CHANGELOG.md b/CHANGELOG.md index 23b0de5..49bc481 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,23 +5,32 @@ All notable changes to this project will be documented in this file. The format is based on Keep a Changelog and this project adheres to Semantic Versioning. ## [Unreleased] + +## [0.2.1] - 2025-11-03 ### Added +- Normalization stats helpers: `AxisStats`, `NormStats`, `ComputeNormStats2D/3D/4D`. +- Builders that reuse stats: `Build2DWithStats`, `Build3DWithStats`, `Build4DWithStats`. +- pkg.go.dev examples: `ExampleBuild2DWithStats`, `ExampleBuild4DWithStats`. +- Tests for stats parity, min==max safety, and dynamic update with reused stats. +- Docs: API reference section “KDTree Normalization Stats (reuse across updates)”; updated multi-dimensional docs with WithStats snippet. + +### Changed +- Bumped version to `0.2.1`. + +### Previously added in Unreleased - README badges (pkg.go.dev, CI, Go Report Card, govulncheck) and KDTree performance/concurrency notes. - Examples directory with runnable programs: 1D ping, 2D ping+hop, 3D ping+hop+geo, 4D ping+hop+geo+score. - CI workflow (Go 1.22/1.23): tidy check, build, vet, test -race, build examples, govulncheck, golangci-lint. - Lint configuration (.golangci.yml) with a pragmatic ruleset. - Contributor docs: CONTRIBUTING.md, CODE_OF_CONDUCT.md, SECURITY.md. - pkg.go.dev example functions for KDTree usage and helpers. -- Fuzz tests and benchmarks for KDTree (Nearest/KNearest/Radius and metrics). - -### Changed -- Documented KDTree complexity and tie-ordering in code comments. -- Docs: API examples synced to Version 0.2.0; added references to helpers and examples. +- Fuzz tests and benchmarks for KDTree (Nearest/KNearest/Radius and metrics). ## [0.2.0] - 2025-10-?? ### Added - KDTree public API with generic payloads and helper builders (Build2D/3D/4D). - Docs pages for DHT examples and multi-dimensional KDTree usage. -[Unreleased]: https://github.com/Snider/Poindexter/compare/v0.2.0...HEAD +[Unreleased]: https://github.com/Snider/Poindexter/compare/v0.2.1...HEAD +[0.2.1]: https://github.com/Snider/Poindexter/releases/tag/v0.2.1 [0.2.0]: https://github.com/Snider/Poindexter/releases/tag/v0.2.0 diff --git a/docs/api.md b/docs/api.md index 1de135e..4435649 100644 --- a/docs/api.md +++ b/docs/api.md @@ -13,13 +13,13 @@ func Version() string Returns the current version of the library. **Returns:** -- `string`: The version string (e.g., "0.2.0") +- `string`: The version string (e.g., "0.2.1") **Example:** ```go version := poindexter.Version() -fmt.Println(version) // Output: 0.2.0 +fmt.Println(version) // Output: 0.2.1 ``` --- @@ -410,3 +410,85 @@ Construct an empty KDTree with the given dimension, then populate later via `Ins - Concurrency: KDTree is not safe for concurrent mutation. Wrap with a mutex or share immutable snapshots for read-mostly workloads. See runnable examples in the repository `examples/` and the docs pages for 1D DHT and multi-dimensional KDTree usage. + + +## KDTree Normalization Stats (reuse across updates) + +To keep normalization consistent across dynamic updates, compute per‑axis min/max once and reuse it to build points later. This avoids drift when the candidate set changes. + +### Types + +```go +// AxisStats holds the min/max observed for a single axis. +type AxisStats struct { + Min float64 + Max float64 +} + +// NormStats holds per‑axis normalisation stats; for D dims, Stats has length D. +type NormStats struct { + Stats []AxisStats +} +``` + +### Compute normalization stats + +```go +func ComputeNormStats2D[T any](items []T, f1, f2 func(T) float64) NormStats +func ComputeNormStats3D[T any](items []T, f1, f2, f3 func(T) float64) NormStats +func ComputeNormStats4D[T any](items []T, f1, f2, f3, f4 func(T) float64) NormStats +``` + +### Build with precomputed stats + +```go +func Build2DWithStats[T any]( + items []T, + id func(T) string, + f1, f2 func(T) float64, + weights [2]float64, + invert [2]bool, + stats NormStats, +) ([]KDPoint[T], error) + +func Build3DWithStats[T any]( + items []T, + id func(T) string, + f1, f2, f3 func(T) float64, + weights [3]float64, + invert [3]bool, + stats NormStats, +) ([]KDPoint[T], error) + +func Build4DWithStats[T any]( + items []T, + id func(T) string, + f1, f2, f3, f4 func(T) float64, + weights [4]float64, + invert [4]bool, + stats NormStats, +) ([]KDPoint[T], error) +``` + +#### Example (2D) +```go +// Compute stats once over your baseline set +stats := poindexter.ComputeNormStats2D(peers, + func(p Peer) float64 { return p.PingMS }, + func(p Peer) float64 { return p.Hops }, +) + +// Build points using those stats (now or later) +pts, _ := poindexter.Build2DWithStats( + peers, + func(p Peer) string { return p.ID }, + func(p Peer) float64 { return p.PingMS }, + func(p Peer) float64 { return p.Hops }, + [2]float64{1,1}, [2]bool{false,false}, stats, +) +``` + +Notes: +- If `min==max` for an axis, normalized value is `0` for that axis. +- `invert[i]` flips the normalized axis as `1 - n` before applying `weights[i]`. +- These helpers mirror `Build2D/3D/4D`, but use your provided `NormStats` instead of recomputing from the items slice. diff --git a/docs/kdtree-multidimensional.md b/docs/kdtree-multidimensional.md index b16f077..e4cc6a5 100644 --- a/docs/kdtree-multidimensional.md +++ b/docs/kdtree-multidimensional.md @@ -192,7 +192,26 @@ func main() { ## Dynamic updates -Your routing table changes constantly. Insert/remove peers. For consistent normalization, rebuild points when the candidate set changes (or cache and reuse your min/max stats). +Your routing table changes constantly. Insert/remove peers. For consistent normalization, compute and reuse your min/max stats (preferred) or rebuild points when the candidate set changes. + +Tip: Use the WithStats helpers to reuse normalization across updates: + +```go +// Compute once over your baseline +stats := poindexter.ComputeNormStats2D(peers, + func(p Peer) float64 { return p.PingMS }, + func(p Peer) float64 { return p.Hops }, +) + +// Build now or later using the same stats +ts, _ := poindexter.Build2DWithStats( + peers, + func(p Peer) string { return p.ID }, + func(p Peer) float64 { return p.PingMS }, + func(p Peer) float64 { return p.Hops }, + [2]float64{1,1}, [2]bool{false,false}, stats, +) +``` ```go package main diff --git a/examples_test.go b/examples_test.go index 5ef41b7..259669b 100644 --- a/examples_test.go +++ b/examples_test.go @@ -119,3 +119,47 @@ func ExampleBuild4D() { fmt.Println(tr.Dim()) // Output: 4 } + +func ExampleBuild2DWithStats() { + type rec struct{ ping, hops float64 } + items := []rec{{20, 3}, {30, 2}, {15, 4}} + weights := [2]float64{1.0, 1.0} + invert := [2]bool{false, false} + stats := poindexter.ComputeNormStats2D(items, + func(r rec) float64 { return r.ping }, + func(r rec) float64 { return r.hops }, + ) + pts, _ := poindexter.Build2DWithStats(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.ping }, + func(r rec) float64 { return r.hops }, + weights, invert, stats, + ) + tr, _ := poindexter.NewKDTree(pts) + fmt.Printf("dim=%d len=%d", tr.Dim(), tr.Len()) + // Output: dim=2 len=3 +} + +func ExampleBuild4DWithStats() { + type rec struct{ a, b, c, d float64 } + items := []rec{{0, 0, 0, 0}, {1, 1, 1, 1}} + weights := [4]float64{1, 1, 1, 1} + invert := [4]bool{false, false, false, false} + stats := poindexter.ComputeNormStats4D(items, + func(r rec) float64 { return r.a }, + func(r rec) float64 { return r.b }, + func(r rec) float64 { return r.c }, + func(r rec) float64 { return r.d }, + ) + pts, _ := poindexter.Build4DWithStats(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.a }, + func(r rec) float64 { return r.b }, + func(r rec) float64 { return r.c }, + func(r rec) float64 { return r.d }, + weights, invert, stats, + ) + tr, _ := poindexter.NewKDTree(pts) + fmt.Println(tr.Dim()) + // Output: 4 +} diff --git a/kdtree_helpers.go b/kdtree_helpers.go index c359184..a70f0b6 100644 --- a/kdtree_helpers.go +++ b/kdtree_helpers.go @@ -4,6 +4,18 @@ package poindexter // and per-axis weights. These are convenience utilities to make it easy to map domain // records into KD space for 2D/3D/4D use-cases. +// AxisStats holds the min/max observed for a single axis. +type AxisStats struct { + Min float64 + Max float64 +} + +// NormStats holds per-axis normalisation statistics. +// For D dimensions, Stats has length D. +type NormStats struct { + Stats []AxisStats +} + // minMax returns (min,max) of a slice. func minMax(xs []float64) (float64, float64) { if len(xs) == 0 { @@ -29,6 +41,54 @@ func scale01(v, min, max float64) float64 { return (v - min) / (max - min) } +// ComputeNormStats2D computes per-axis min/max for two features. +func ComputeNormStats2D[T any](items []T, f1, f2 func(T) float64) NormStats { + vals1 := make([]float64, len(items)) + vals2 := make([]float64, len(items)) + for i, it := range items { + vals1[i] = f1(it) + vals2[i] = f2(it) + } + mn1, mx1 := minMax(vals1) + mn2, mx2 := minMax(vals2) + return NormStats{Stats: []AxisStats{{mn1, mx1}, {mn2, mx2}}} +} + +// ComputeNormStats3D computes per-axis min/max for three features. +func ComputeNormStats3D[T any](items []T, f1, f2, f3 func(T) float64) NormStats { + vals1 := make([]float64, len(items)) + vals2 := make([]float64, len(items)) + vals3 := make([]float64, len(items)) + for i, it := range items { + vals1[i] = f1(it) + vals2[i] = f2(it) + vals3[i] = f3(it) + } + mn1, mx1 := minMax(vals1) + mn2, mx2 := minMax(vals2) + mn3, mx3 := minMax(vals3) + return NormStats{Stats: []AxisStats{{mn1, mx1}, {mn2, mx2}, {mn3, mx3}}} +} + +// ComputeNormStats4D computes per-axis min/max for four features. +func ComputeNormStats4D[T any](items []T, f1, f2, f3, f4 func(T) float64) NormStats { + vals1 := make([]float64, len(items)) + vals2 := make([]float64, len(items)) + vals3 := make([]float64, len(items)) + vals4 := make([]float64, len(items)) + for i, it := range items { + vals1[i] = f1(it) + vals2[i] = f2(it) + vals3[i] = f3(it) + vals4[i] = f4(it) + } + mn1, mx1 := minMax(vals1) + mn2, mx2 := minMax(vals2) + mn3, mx3 := minMax(vals3) + mn4, mx4 := minMax(vals4) + return NormStats{Stats: []AxisStats{{mn1, mx1}, {mn2, mx2}, {mn3, mx3}, {mn4, mx4}}} +} + // Build2D constructs normalized-and-weighted KD points from items using two feature extractors. // - id: function to provide a stable string ID (can return "" if you don't need DeleteByID) // - f1,f2: feature extractors (raw values) @@ -69,6 +129,33 @@ func Build2D[T any](items []T, id func(T) string, f1, f2 func(T) float64, weight return pts, nil } +// Build2DWithStats builds points using provided normalisation stats. +func Build2DWithStats[T any](items []T, id func(T) string, f1, f2 func(T) float64, weights [2]float64, invert [2]bool, stats NormStats) ([]KDPoint[T], error) { + if len(items) == 0 { + return nil, nil + } + if len(stats.Stats) != 2 { + return nil, nil + } + pts := make([]KDPoint[T], len(items)) + for i, it := range items { + n1 := scale01(f1(it), stats.Stats[0].Min, stats.Stats[0].Max) + n2 := scale01(f2(it), stats.Stats[1].Min, stats.Stats[1].Max) + if invert[0] { + n1 = 1 - n1 + } + if invert[1] { + n2 = 1 - n2 + } + pts[i] = KDPoint[T]{ + ID: id(it), + Value: it, + Coords: []float64{weights[0] * n1, weights[1] * n2}, + } + } + return pts, nil +} + // Build3D constructs normalized-and-weighted KD points using three feature extractors. func Build3D[T any](items []T, id func(T) string, f1, f2, f3 func(T) float64, weights [3]float64, invert [3]bool) ([]KDPoint[T], error) { if len(items) == 0 { @@ -113,6 +200,37 @@ func Build3D[T any](items []T, id func(T) string, f1, f2, f3 func(T) float64, we return pts, nil } +// Build3DWithStats builds points using provided normalisation stats. +func Build3DWithStats[T any](items []T, id func(T) string, f1, f2, f3 func(T) float64, weights [3]float64, invert [3]bool, stats NormStats) ([]KDPoint[T], error) { + if len(items) == 0 { + return nil, nil + } + if len(stats.Stats) != 3 { + return nil, nil + } + pts := make([]KDPoint[T], len(items)) + for i, it := range items { + n1 := scale01(f1(it), stats.Stats[0].Min, stats.Stats[0].Max) + n2 := scale01(f2(it), stats.Stats[1].Min, stats.Stats[1].Max) + n3 := scale01(f3(it), stats.Stats[2].Min, stats.Stats[2].Max) + if invert[0] { + n1 = 1 - n1 + } + if invert[1] { + n2 = 1 - n2 + } + if invert[2] { + n3 = 1 - n3 + } + pts[i] = KDPoint[T]{ + ID: id(it), + Value: it, + Coords: []float64{weights[0] * n1, weights[1] * n2, weights[2] * n3}, + } + } + return pts, nil +} + // Build4D constructs normalized-and-weighted KD points using four feature extractors. func Build4D[T any](items []T, id func(T) string, f1, f2, f3, f4 func(T) float64, weights [4]float64, invert [4]bool) ([]KDPoint[T], error) { if len(items) == 0 { @@ -164,3 +282,38 @@ func Build4D[T any](items []T, id func(T) string, f1, f2, f3, f4 func(T) float64 } return pts, nil } + +// Build4DWithStats builds points using provided normalisation stats. +func Build4DWithStats[T any](items []T, id func(T) string, f1, f2, f3, f4 func(T) float64, weights [4]float64, invert [4]bool, stats NormStats) ([]KDPoint[T], error) { + if len(items) == 0 { + return nil, nil + } + if len(stats.Stats) != 4 { + return nil, nil + } + pts := make([]KDPoint[T], len(items)) + for i, it := range items { + n1 := scale01(f1(it), stats.Stats[0].Min, stats.Stats[0].Max) + n2 := scale01(f2(it), stats.Stats[1].Min, stats.Stats[1].Max) + n3 := scale01(f3(it), stats.Stats[2].Min, stats.Stats[2].Max) + n4 := scale01(f4(it), stats.Stats[3].Min, stats.Stats[3].Max) + if invert[0] { + n1 = 1 - n1 + } + if invert[1] { + n2 = 1 - n2 + } + if invert[2] { + n3 = 1 - n3 + } + if invert[3] { + n4 = 1 - n4 + } + pts[i] = KDPoint[T]{ + ID: id(it), + Value: it, + Coords: []float64{weights[0] * n1, weights[1] * n2, weights[2] * n3, weights[3] * n4}, + } + } + return pts, nil +} diff --git a/kdtree_helpers_test.go b/kdtree_helpers_test.go index 6dd9b8b..48ee724 100644 --- a/kdtree_helpers_test.go +++ b/kdtree_helpers_test.go @@ -110,3 +110,116 @@ func TestBuild4D_EndToEnd_Example(t *testing.T) { t.Fatalf("expected best B, got %s", best.ID) } } + +func TestComputeNormStatsAndWithStats_Parity2D(t *testing.T) { + type rec struct{ a, b float64 } + items := []rec{{0, 10}, {5, 20}, {10, 30}} + weights := [2]float64{1, 2} + invert := [2]bool{false, true} + // Build using automatic stats + autoPts, err := Build2D(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.a }, + func(r rec) float64 { return r.b }, + weights, invert, + ) + if err != nil { + t.Fatalf("auto build err: %v", err) + } + // Compute stats and build with stats + stats := ComputeNormStats2D(items, + func(r rec) float64 { return r.a }, + func(r rec) float64 { return r.b }, + ) + withPts, err := Build2DWithStats(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.a }, + func(r rec) float64 { return r.b }, + weights, invert, stats, + ) + if err != nil { + t.Fatalf("with-stats build err: %v", err) + } + if len(withPts) != len(autoPts) { + t.Fatalf("len mismatch") + } + for i := range withPts { + if len(withPts[i].Coords) != 2 { + t.Fatalf("dim mismatch") + } + if withPts[i].Coords[0] != autoPts[i].Coords[0] || withPts[i].Coords[1] != autoPts[i].Coords[1] { + t.Fatalf("coords mismatch at %d: %v vs %v", i, withPts[i].Coords, autoPts[i].Coords) + } + } +} + +func TestBuild3DWithStats_MinEqualsMax_Safe(t *testing.T) { + type rec struct{ x, y, z float64 } + items := []rec{{1, 2, 3}, {1, 5, 3}, {1, 9, 3}} + weights := [3]float64{1, 1, 1} + invert := [3]bool{false, false, false} + // x and z min==max across items for x=1, z=3 + stats := NormStats{Stats: []AxisStats{{Min: 1, Max: 1}, {Min: 2, Max: 9}, {Min: 3, Max: 3}}} + pts, err := Build3DWithStats(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.x }, + func(r rec) float64 { return r.y }, + func(r rec) float64 { return r.z }, + weights, invert, stats, + ) + if err != nil { + t.Fatalf("err: %v", err) + } + for _, p := range pts { + if p.Coords[0] != 0 || p.Coords[2] != 0 { + t.Fatalf("expected zero for min==max axes, got %v", p.Coords) + } + } +} + +func TestBuild4DWithStats_DynamicUpdateExample(t *testing.T) { + type Peer struct { + ID string + Ping, Hops, Geo, Score float64 + } + base := []Peer{{"A", 20, 3, 1000, 0.8}, {"B", 30, 2, 800, 0.9}} + weights := [4]float64{1, 1, 0.2, 1.2} + invert := [4]bool{false, false, false, true} + stats := ComputeNormStats4D(base, + func(p Peer) float64 { return p.Ping }, + func(p Peer) float64 { return p.Hops }, + func(p Peer) float64 { return p.Geo }, + func(p Peer) float64 { return p.Score }, + ) + pts, err := Build4DWithStats(base, + func(p Peer) string { return p.ID }, + func(p Peer) float64 { return p.Ping }, + func(p Peer) float64 { return p.Hops }, + func(p Peer) float64 { return p.Geo }, + func(p Peer) float64 { return p.Score }, + weights, invert, stats, + ) + if err != nil { + t.Fatalf("err: %v", err) + } + tr, err := NewKDTree(pts) + if err != nil { + t.Fatalf("kdt err: %v", err) + } + // add a new peer using same stats + newPeer := Peer{"Z", 15, 2, 1200, 0.85} + newPts, _ := Build4DWithStats([]Peer{newPeer}, + func(p Peer) string { return p.ID }, + func(p Peer) float64 { return p.Ping }, + func(p Peer) float64 { return p.Hops }, + func(p Peer) float64 { return p.Geo }, + func(p Peer) float64 { return p.Score }, + weights, invert, stats, + ) + if !tr.Insert(newPts[0]) { + t.Fatalf("insert failed") + } + if tr.Dim() != 4 { + t.Fatalf("dim != 4") + } +} diff --git a/poindexter.go b/poindexter.go index 9b7f7ac..2c04e19 100644 --- a/poindexter.go +++ b/poindexter.go @@ -3,7 +3,7 @@ package poindexter // Version returns the current version of the library. func Version() string { - return "0.2.0" + return "0.2.1" } // Hello returns a greeting message. diff --git a/poindexter_test.go b/poindexter_test.go index 4f03f01..e2b3a47 100644 --- a/poindexter_test.go +++ b/poindexter_test.go @@ -7,8 +7,8 @@ func TestVersion(t *testing.T) { if version == "" { t.Error("Version should not be empty") } - if version != "0.2.0" { - t.Errorf("Expected version 0.2.0, got %s", version) + if version != "0.2.1" { + t.Errorf("Expected version 0.2.1, got %s", version) } }