diff --git a/docs/getting-started.md b/docs/getting-started.md index 2b82a86..0303814 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -123,4 +123,5 @@ func main() { - Check out the [API Reference](api.md) for detailed documentation - Try the example: [Find the best (lowest‑ping) DHT peer](dht-best-ping.md) +- Explore multi-dimensional KDTree over ping/hops/geo/score: [Multi-Dimensional KDTree (DHT)](kdtree-multidimensional.md) - Read about the [License](license.md) diff --git a/docs/index.md b/docs/index.md index 4743455..7150ee9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -52,3 +52,4 @@ Contributions are welcome! Please feel free to submit a Pull Request. ## Examples - Find the best (lowest‑ping) DHT peer using KDTree: [Best Ping Peer (DHT)](dht-best-ping.md) +- Multi-dimensional neighbor search over ping, hops, geo, and score: [Multi-Dimensional KDTree (DHT)](kdtree-multidimensional.md) diff --git a/docs/kdtree-multidimensional.md b/docs/kdtree-multidimensional.md new file mode 100644 index 0000000..36634b1 --- /dev/null +++ b/docs/kdtree-multidimensional.md @@ -0,0 +1,255 @@ +# KDTree: Multi‑Dimensional Search (DHT peers) + +This example extends the single‑dimension "best ping" demo to a realistic multi‑dimensional selection: + +- ping_ms (lower is better) +- hop_count (lower is better) +- geo_distance_km (lower is better) +- score (higher is better — e.g., capacity/reputation) + +We will: +- Build 4‑D points over these features +- Run `Nearest`, `KNearest`, and `Radius` queries +- Show subsets: ping+hop (2‑D) and ping+hop+geo (3‑D) +- Demonstrate weighting/normalization to balance disparate units + +> Tip: KDTree distances are geometric. Mixing units (ms, hops, km, arbitrary score) requires scaling so that each axis contributes proportionally to your decision policy. + +## Dataset + +```go +package main + +import ( + "fmt" + poindexter "github.com/Snider/Poindexter" +) + +type Peer struct { + ID string + PingMS float64 // milliseconds + Hops float64 // hop count + GeoKM float64 // crow‑flight distance in kilometers + Score float64 // [0..1] trust/rep/capacity score (higher is better) +} + +var peers = []Peer{ + {ID: "A", PingMS: 22, Hops: 3, GeoKM: 1200, Score: 0.86}, + {ID: "B", PingMS: 34, Hops: 2, GeoKM: 800, Score: 0.91}, + {ID: "C", PingMS: 15, Hops: 4, GeoKM: 4500, Score: 0.70}, + {ID: "D", PingMS: 55, Hops: 1, GeoKM: 300, Score: 0.95}, + {ID: "E", PingMS: 18, Hops: 2, GeoKM: 2200, Score: 0.80}, +} +``` + +## Normalization and weights + +We scale raw features to comparable magnitudes and flip `Score` so lower is better. For demo simplicity we will: +- Min‑max normalize each axis to [0,1] over the current candidate set +- Convert `Score` to a cost: `score_cost = 1 - score` +- Apply weights to emphasize certain axes + +Helper functions: + +```go +// minMax returns (min, max) of a slice. +func minMax(xs []float64) (float64, float64) { + if len(xs) == 0 { return 0, 0 } + mn, mx := xs[0], xs[0] + for _, v := range xs[1:] { + if v < mn { mn = v } + if v > mx { mx = v } + } + return mn, mx +} + +// scale01 maps v from [min,max] to [0,1]. If min==max, returns 0. +func scale01(v, min, max float64) float64 { + if max == min { return 0 } + return (v - min) / (max - min) +} +``` + +Build 4‑D points: + +```go +// Weights to balance axes (tune to taste) +var wPing, wHop, wGeo, wScore = 1.0, 0.7, 0.2, 1.2 + +func build4D(peers []Peer) ([]poindexter.KDPoint[Peer], error) { + pings := make([]float64, len(peers)) + hops := make([]float64, len(peers)) + geos := make([]float64, len(peers)) + scores:= make([]float64, len(peers)) + for i, p := range peers { + pings[i], hops[i], geos[i], scores[i] = p.PingMS, p.Hops, p.GeoKM, p.Score + } + pMin, pMax := minMax(pings) + hMin, hMax := minMax(hops) + gMin, gMax := minMax(geos) + sMin, sMax := minMax(scores) + + pts := make([]poindexter.KDPoint[Peer], len(peers)) + for i, p := range peers { + pingN := scale01(p.PingMS, pMin, pMax) + hopN := scale01(p.Hops, hMin, hMax) + geoN := scale01(p.GeoKM, gMin, gMax) + scoreC := 1 - scale01(p.Score, sMin, sMax) // lower is better + + pts[i] = poindexter.KDPoint[Peer]{ + ID: p.ID, + Value: p, + Coords: []float64{ + wPing*pingN, + wHop*hopN, + wGeo*geoN, + wScore*scoreC, + }, + } + } + return pts, nil +} +``` + +## 4‑D KDTree: Nearest, k‑NN, Radius + +```go +func main() { + // Build 4‑D KDTree using Euclidean (L2) + pts, _ := build4D(peers) + tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + + // Query target preferences (you may construct a query in normalized/weighted space) + // Example: seek very low ping, low hops, moderate geo, high score (low score_cost) + query := []float64{wPing*0.0, wHop*0.2, wGeo*0.3, wScore*0.0} + + // 1‑NN + best, dist, ok := tree.Nearest(query) + if ok { + fmt.Printf("Best peer: %s (dist=%.4f)\n", best.ID, dist) + } + + // k‑NN (top 3) + neigh, dists := tree.KNearest(query, 3) + for i := range neigh { + fmt.Printf("%d) %s dist=%.4f\n", i+1, neigh[i].ID, dists[i]) + } + + // Radius query + within, wd := tree.Radius(query, 0.35) + fmt.Printf("Within radius 0.35: ") + for i := range within { + fmt.Printf("%s(%.3f) ", within[i].ID, wd[i]) + } + fmt.Println() +} +``` + +## 2‑D: Ping + Hop + +Sometimes you want a strict trade‑off between just latency and path length. Build 2‑D points (reuse normalization): + +```go +var wPing2, wHop2 = 1.0, 1.0 + +func build2D_pingHop(peers []Peer) []poindexter.KDPoint[Peer] { + pings := make([]float64, len(peers)) + hops := make([]float64, len(peers)) + for i, p := range peers { pings[i], hops[i] = p.PingMS, p.Hops } + pMin, pMax := minMax(pings) + hMin, hMax := minMax(hops) + + pts := make([]poindexter.KDPoint[Peer], len(peers)) + for i, p := range peers { + pingN := scale01(p.PingMS, pMin, pMax) + hopN := scale01(p.Hops, hMin, hMax) + pts[i] = poindexter.KDPoint[Peer]{ + ID: p.ID, + Value: p, + Coords: []float64{ wPing2*pingN, wHop2*hopN }, + } + } + return pts +} + +func demo2D() { + pts := build2D_pingHop(peers) + tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.ManhattanDistance{})) // L1 favors axis‑aligned tradeoffs + // Prefer very low ping, modest hops + query := []float64{wPing2*0.0, wHop2*0.3} + best, _, _ := tree.Nearest(query) + fmt.Println("2D best (ping+hop):", best.ID) +} +``` + +## 3‑D: Ping + Hop + Geo + +Add geography to discourage far peers when latency is similar: + +```go +var wPing3, wHop3, wGeo3 = 1.0, 0.7, 0.3 + +func build3D_pingHopGeo(peers []Peer) []poindexter.KDPoint[Peer] { + pings := make([]float64, len(peers)) + hops := make([]float64, len(peers)) + geos := make([]float64, len(peers)) + for i, p := range peers { pings[i], hops[i], geos[i] = p.PingMS, p.Hops, p.GeoKM } + pMin, pMax := minMax(pings) + hMin, hMax := minMax(hops) + gMin, gMax := minMax(geos) + + pts := make([]poindexter.KDPoint[Peer], len(peers)) + for i, p := range peers { + pingN := scale01(p.PingMS, pMin, pMax) + hopN := scale01(p.Hops, hMin, hMax) + geoN := scale01(p.GeoKM, gMin, gMax) + pts[i] = poindexter.KDPoint[Peer]{ + ID: p.ID, + Value: p, + Coords: []float64{ wPing3*pingN, wHop3*hopN, wGeo3*geoN }, + } + } + return pts +} + +func demo3D() { + pts := build3D_pingHopGeo(peers) + tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + // Prefer low ping/hop, modest geo + query := []float64{wPing3*0.0, wHop3*0.2, wGeo3*0.4} + top, _, _ := tree.Nearest(query) + fmt.Println("3D best (ping+hop+geo):", top.ID) +} +``` + +## Dynamic updates + +Your routing table changes constantly. Insert/remove peers without rebuilding: + +```go +func updatesExample() { + pts := build2D_pingHop(peers) + tree, _ := poindexter.NewKDTree(pts) + + // Insert a new peer + newPeer := Peer{ID: "Z", PingMS: 12, Hops: 2, GeoKM: 900, Score: 0.88} + // Build consistent 2D point for the new peer. In a real system retain normalization mins/maxes. + ptsZ := build2D_pingHop([]Peer{newPeer}) + _ = tree.Insert(ptsZ[0]) + + // Delete by ID when peer goes offline + _ = tree.DeleteByID("Z") +} +``` + +## Choosing a metric + +- Euclidean (L2): smooth trade‑offs across axes; good default for blended preferences +- Manhattan (L1): emphasizes per‑axis absolute differences; useful when each unit of ping/hop matters equally +- Chebyshev (L∞): min‑max style; dominated by the worst axis (e.g., reject any peer with too many hops regardless of ping) + +## Notes on production use + +- Keep and reuse normalization parameters (min/max or mean/std) rather than recomputing per query to avoid drift. +- Consider capping outliers (e.g., clamp geo distances > 5000 km). +- For large N (≥ 1e5) and low dims (≤ 8), consider swapping the internal engine to `gonum.org/v1/gonum/spatial/kdtree` behind the same API for faster queries. diff --git a/kdtree_helpers.go b/kdtree_helpers.go new file mode 100644 index 0000000..c359184 --- /dev/null +++ b/kdtree_helpers.go @@ -0,0 +1,166 @@ +package poindexter + +// Helper builders for KDTree points with min-max normalization, optional inversion per-axis, +// and per-axis weights. These are convenience utilities to make it easy to map domain +// records into KD space for 2D/3D/4D use-cases. + +// minMax returns (min,max) of a slice. +func minMax(xs []float64) (float64, float64) { + if len(xs) == 0 { + return 0, 0 + } + mn, mx := xs[0], xs[0] + for _, v := range xs[1:] { + if v < mn { + mn = v + } + if v > mx { + mx = v + } + } + return mn, mx +} + +// scale01 maps v from [min,max] to [0,1]. If min==max, returns 0. +func scale01(v, min, max float64) float64 { + if max == min { + return 0 + } + return (v - min) / (max - min) +} + +// Build2D constructs normalized-and-weighted KD points from items using two feature extractors. +// - id: function to provide a stable string ID (can return "" if you don't need DeleteByID) +// - f1,f2: feature extractors (raw values) +// - weights: per-axis weights applied after normalization +// - invert: per-axis flags; if true, the axis is inverted (1-norm) so that higher raw values become lower cost +func Build2D[T any](items []T, id func(T) string, f1, f2 func(T) float64, weights [2]float64, invert [2]bool) ([]KDPoint[T], error) { + if len(items) == 0 { + return nil, nil + } + vals1 := make([]float64, len(items)) + vals2 := make([]float64, len(items)) + for i, it := range items { + vals1[i] = f1(it) + vals2[i] = f2(it) + } + mn1, mx1 := minMax(vals1) + mn2, mx2 := minMax(vals2) + + pts := make([]KDPoint[T], len(items)) + for i, it := range items { + n1 := scale01(vals1[i], mn1, mx1) + n2 := scale01(vals2[i], mn2, mx2) + if invert[0] { + n1 = 1 - n1 + } + if invert[1] { + n2 = 1 - n2 + } + pts[i] = KDPoint[T]{ + ID: id(it), + Value: it, + Coords: []float64{ + weights[0] * n1, + weights[1] * n2, + }, + } + } + return pts, nil +} + +// Build3D constructs normalized-and-weighted KD points using three feature extractors. +func Build3D[T any](items []T, id func(T) string, f1, f2, f3 func(T) float64, weights [3]float64, invert [3]bool) ([]KDPoint[T], error) { + if len(items) == 0 { + return nil, nil + } + vals1 := make([]float64, len(items)) + vals2 := make([]float64, len(items)) + vals3 := make([]float64, len(items)) + for i, it := range items { + vals1[i] = f1(it) + vals2[i] = f2(it) + vals3[i] = f3(it) + } + mn1, mx1 := minMax(vals1) + mn2, mx2 := minMax(vals2) + mn3, mx3 := minMax(vals3) + + pts := make([]KDPoint[T], len(items)) + for i, it := range items { + n1 := scale01(vals1[i], mn1, mx1) + n2 := scale01(vals2[i], mn2, mx2) + n3 := scale01(vals3[i], mn3, mx3) + if invert[0] { + n1 = 1 - n1 + } + if invert[1] { + n2 = 1 - n2 + } + if invert[2] { + n3 = 1 - n3 + } + pts[i] = KDPoint[T]{ + ID: id(it), + Value: it, + Coords: []float64{ + weights[0] * n1, + weights[1] * n2, + weights[2] * n3, + }, + } + } + return pts, nil +} + +// Build4D constructs normalized-and-weighted KD points using four feature extractors. +func Build4D[T any](items []T, id func(T) string, f1, f2, f3, f4 func(T) float64, weights [4]float64, invert [4]bool) ([]KDPoint[T], error) { + if len(items) == 0 { + return nil, nil + } + vals1 := make([]float64, len(items)) + vals2 := make([]float64, len(items)) + vals3 := make([]float64, len(items)) + vals4 := make([]float64, len(items)) + for i, it := range items { + vals1[i] = f1(it) + vals2[i] = f2(it) + vals3[i] = f3(it) + vals4[i] = f4(it) + } + mn1, mx1 := minMax(vals1) + mn2, mx2 := minMax(vals2) + mn3, mx3 := minMax(vals3) + mn4, mx4 := minMax(vals4) + + pts := make([]KDPoint[T], len(items)) + for i, it := range items { + n1 := scale01(vals1[i], mn1, mx1) + n2 := scale01(vals2[i], mn2, mx2) + n3 := scale01(vals3[i], mn3, mx3) + n4 := scale01(vals4[i], mn4, mx4) + if invert[0] { + n1 = 1 - n1 + } + if invert[1] { + n2 = 1 - n2 + } + if invert[2] { + n3 = 1 - n3 + } + if invert[3] { + n4 = 1 - n4 + } + pts[i] = KDPoint[T]{ + ID: id(it), + Value: it, + Coords: []float64{ + weights[0] * n1, + weights[1] * n2, + weights[2] * n3, + weights[3] * n4, + }, + } + } + return pts, nil +} diff --git a/kdtree_helpers_test.go b/kdtree_helpers_test.go new file mode 100644 index 0000000..6dd9b8b --- /dev/null +++ b/kdtree_helpers_test.go @@ -0,0 +1,112 @@ +package poindexter + +import ( + "fmt" + "testing" +) + +func TestBuild2D_NormalizationAndInversion(t *testing.T) { + type rec struct{ a, b float64 } + items := []rec{{a: 0, b: 100}, {a: 10, b: 300}} + // f1 over [0,10], f2 over [100,300] + pts, err := Build2D(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.a }, + func(r rec) float64 { return r.b }, + [2]float64{2.0, 0.5}, + [2]bool{true, false}, // invert first axis, not second + ) + if err != nil { + t.Fatalf("Build2D err: %v", err) + } + if len(pts) != 2 { + t.Fatalf("expected 2 points, got %d", len(pts)) + } + // item0: a=0 -> n1=0 -> invert -> 1 -> *2 = 2; b=100 -> n2=0 -> *0.5 = 0 + if got := fmt.Sprintf("%.1f,%.1f", pts[0].Coords[0], pts[0].Coords[1]); got != "2.0,0.0" { + t.Fatalf("coords[0] = %s, want 2.0,0.0", got) + } + // item1: a=10 -> n1=1 -> invert -> 0 -> *2 = 0; b=300 -> n2=1 -> *0.5=0.5 + if got := fmt.Sprintf("%.1f,%.1f", pts[1].Coords[0], pts[1].Coords[1]); got != "0.0,0.5" { + t.Fatalf("coords[1] = %s, want 0.0,0.5", got) + } +} + +func TestBuild3D_AllEqualSafe(t *testing.T) { + type rec struct{ x, y, z float64 } + items := []rec{{1, 1, 1}, {1, 1, 1}} + pts, err := Build3D(items, + func(r rec) string { return "id" }, + func(r rec) float64 { return r.x }, + func(r rec) float64 { return r.y }, + func(r rec) float64 { return r.z }, + [3]float64{1, 1, 1}, + [3]bool{false, false, false}, + ) + if err != nil { + t.Fatalf("Build3D err: %v", err) + } + if len(pts) != 2 { + t.Fatalf("len = %d", len(pts)) + } + for i := range pts { + if len(pts[i].Coords) != 3 { + t.Fatalf("dim = %d", len(pts[i].Coords)) + } + for _, c := range pts[i].Coords { + if c != 0 { + t.Fatalf("expected 0 when min==max, got %v", c) + } + } + } +} + +// Example-style end-to-end sanity on 4D using the documented Peer data +func TestBuild4D_EndToEnd_Example(t *testing.T) { + type Peer struct { + ID string + PingMS float64 + Hops float64 + GeoKM float64 + Score float64 + } + peers := []Peer{ + {ID: "A", PingMS: 22, Hops: 3, GeoKM: 1200, Score: 0.86}, + {ID: "B", PingMS: 34, Hops: 2, GeoKM: 800, Score: 0.91}, + {ID: "C", PingMS: 15, Hops: 4, GeoKM: 4500, Score: 0.70}, + {ID: "D", PingMS: 55, Hops: 1, GeoKM: 300, Score: 0.95}, + {ID: "E", PingMS: 18, Hops: 2, GeoKM: 2200, Score: 0.80}, + } + weights := [4]float64{1.0, 0.7, 0.2, 1.2} + invert := [4]bool{false, false, false, true} // flip score so higher score -> lower cost + pts, err := Build4D(peers, + func(p Peer) string { return p.ID }, + func(p Peer) float64 { return p.PingMS }, + func(p Peer) float64 { return p.Hops }, + func(p Peer) float64 { return p.GeoKM }, + func(p Peer) float64 { return p.Score }, + weights, invert, + ) + if err != nil { + t.Fatalf("Build4D err: %v", err) + } + if len(pts) != len(peers) { + t.Fatalf("len pts=%d", len(pts)) + } + // Build KDTree and query near origin in normalized/weighted space (prefer minima on all axes) + tree, err := NewKDTree(pts, WithMetric(EuclideanDistance{})) + if err != nil { + t.Fatalf("NewKDTree err: %v", err) + } + if tree.Dim() != 4 { + t.Fatalf("dim=%d", tree.Dim()) + } + best, _, ok := tree.Nearest([]float64{0, 0, 0, 0}) + if !ok { + t.Fatalf("no nearest") + } + // With these weights and inversions, peer B emerges as closest in this setup. + if best.ID != "B" { + t.Fatalf("expected best B, got %s", best.ID) + } +} diff --git a/mkdocs.yml b/mkdocs.yml index 3be25a6..d0c3179 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -57,6 +57,7 @@ nav: - Getting Started: getting-started.md - Examples: - Best Ping Peer (DHT): dht-best-ping.md + - Multi-Dimensional KDTree (DHT): kdtree-multidimensional.md - API Reference: api.md - License: license.md diff --git a/poindexter_test.go b/poindexter_test.go index 92445cf..4f03f01 100644 --- a/poindexter_test.go +++ b/poindexter_test.go @@ -7,8 +7,8 @@ func TestVersion(t *testing.T) { if version == "" { t.Error("Version should not be empty") } - if version != "0.1.0" { - t.Errorf("Expected version 0.1.0, got %s", version) + if version != "0.2.0" { + t.Errorf("Expected version 0.2.0, got %s", version) } }