Add multi-dimensional KDTree example and update documentation

2025-11-03 17:33:37 +00:00 · 2025-11-03 17:33:37 +00:00 · 5da17d8b61
commit 5da17d8b61
parent 736ce911e0
7 changed files with 538 additions and 2 deletions
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@ -123,4 +123,5 @@ func main() {

 - Check out the [API Reference](api.md) for detailed documentation
 - Try the example: [Find the best (lowest‑ping) DHT peer](dht-best-ping.md)
+- Explore multi-dimensional KDTree over ping/hops/geo/score: [Multi-Dimensional KDTree (DHT)](kdtree-multidimensional.md)
 - Read about the [License](license.md)
--- a/docs/index.md
+++ b/docs/index.md
@ -52,3 +52,4 @@ Contributions are welcome! Please feel free to submit a Pull Request.
 ## Examples

 - Find the best (lowest‑ping) DHT peer using KDTree: [Best Ping Peer (DHT)](dht-best-ping.md)
+- Multi-dimensional neighbor search over ping, hops, geo, and score: [Multi-Dimensional KDTree (DHT)](kdtree-multidimensional.md)
--- a/docs/kdtree-multidimensional.md
+++ b/docs/kdtree-multidimensional.md
@ -0,0 +1,255 @@
+# KDTree: Multi‑Dimensional Search (DHT peers)
+
+This example extends the single‑dimension "best ping" demo to a realistic multi‑dimensional selection:
+
+- ping_ms (lower is better)
+- hop_count (lower is better)
+- geo_distance_km (lower is better)
+- score (higher is better — e.g., capacity/reputation)
+
+We will:
+- Build 4‑D points over these features
+- Run `Nearest`, `KNearest`, and `Radius` queries
+- Show subsets: ping+hop (2‑D) and ping+hop+geo (3‑D)
+- Demonstrate weighting/normalization to balance disparate units
+
+> Tip: KDTree distances are geometric. Mixing units (ms, hops, km, arbitrary score) requires scaling so that each axis contributes proportionally to your decision policy.
+
+## Dataset
+
+```go
+package main
+
+import (
+    "fmt"
+    poindexter "github.com/Snider/Poindexter"
+)
+
+type Peer struct {
+    ID        string
+    PingMS    float64 // milliseconds
+    Hops      float64 // hop count
+    GeoKM     float64 // crow‑flight distance in kilometers
+    Score     float64 // [0..1] trust/rep/capacity score (higher is better)
+}
+
+var peers = []Peer{
+    {ID: "A", PingMS: 22, Hops: 3, GeoKM: 1200, Score: 0.86},
+    {ID: "B", PingMS: 34, Hops: 2, GeoKM: 800,  Score: 0.91},
+    {ID: "C", PingMS: 15, Hops: 4, GeoKM: 4500, Score: 0.70},
+    {ID: "D", PingMS: 55, Hops: 1, GeoKM: 300,  Score: 0.95},
+    {ID: "E", PingMS: 18, Hops: 2, GeoKM: 2200, Score: 0.80},
+}
+```
+
+## Normalization and weights
+
+We scale raw features to comparable magnitudes and flip `Score` so lower is better. For demo simplicity we will:
+- Min‑max normalize each axis to [0,1] over the current candidate set
+- Convert `Score` to a cost: `score_cost = 1 - score`
+- Apply weights to emphasize certain axes
+
+Helper functions:
+
+```go
+// minMax returns (min, max) of a slice.
+func minMax(xs []float64) (float64, float64) {
+    if len(xs) == 0 { return 0, 0 }
+    mn, mx := xs[0], xs[0]
+    for _, v := range xs[1:] {
+        if v < mn { mn = v }
+        if v > mx { mx = v }
+    }
+    return mn, mx
+}
+
+// scale01 maps v from [min,max] to [0,1]. If min==max, returns 0.
+func scale01(v, min, max float64) float64 {
+    if max == min { return 0 }
+    return (v - min) / (max - min)
+}
+```
+
+Build 4‑D points:
+
+```go
+// Weights to balance axes (tune to taste)
+var wPing, wHop, wGeo, wScore = 1.0, 0.7, 0.2, 1.2
+
+func build4D(peers []Peer) ([]poindexter.KDPoint[Peer], error) {
+    pings := make([]float64, len(peers))
+    hops  := make([]float64, len(peers))
+    geos  := make([]float64, len(peers))
+    scores:= make([]float64, len(peers))
+    for i, p := range peers {
+        pings[i], hops[i], geos[i], scores[i] = p.PingMS, p.Hops, p.GeoKM, p.Score
+    }
+    pMin, pMax := minMax(pings)
+    hMin, hMax := minMax(hops)
+    gMin, gMax := minMax(geos)
+    sMin, sMax := minMax(scores)
+
+    pts := make([]poindexter.KDPoint[Peer], len(peers))
+    for i, p := range peers {
+        pingN  := scale01(p.PingMS, pMin, pMax)
+        hopN   := scale01(p.Hops,   hMin, hMax)
+        geoN   := scale01(p.GeoKM,  gMin, gMax)
+        scoreC := 1 - scale01(p.Score, sMin, sMax) // lower is better
+
+        pts[i] = poindexter.KDPoint[Peer]{
+            ID:    p.ID,
+            Value: p,
+            Coords: []float64{
+                wPing*pingN,
+                wHop*hopN,
+                wGeo*geoN,
+                wScore*scoreC,
+            },
+        }
+    }
+    return pts, nil
+}
+```
+
+## 4‑D KDTree: Nearest, k‑NN, Radius
+
+```go
+func main() {
+    // Build 4‑D KDTree using Euclidean (L2)
+    pts, _ := build4D(peers)
+    tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{}))
+
+    // Query target preferences (you may construct a query in normalized/weighted space)
+    // Example: seek very low ping, low hops, moderate geo, high score (low score_cost)
+    query := []float64{wPing*0.0, wHop*0.2, wGeo*0.3, wScore*0.0}
+
+    // 1‑NN
+    best, dist, ok := tree.Nearest(query)
+    if ok {
+        fmt.Printf("Best peer: %s (dist=%.4f)\n", best.ID, dist)
+    }
+
+    // k‑NN (top 3)
+    neigh, dists := tree.KNearest(query, 3)
+    for i := range neigh {
+        fmt.Printf("%d) %s dist=%.4f\n", i+1, neigh[i].ID, dists[i])
+    }
+
+    // Radius query
+    within, wd := tree.Radius(query, 0.35)
+    fmt.Printf("Within radius 0.35: ")
+    for i := range within {
+        fmt.Printf("%s(%.3f) ", within[i].ID, wd[i])
+    }
+    fmt.Println()
+}
+```
+
+## 2‑D: Ping + Hop
+
+Sometimes you want a strict trade‑off between just latency and path length. Build 2‑D points (reuse normalization):
+
+```go
+var wPing2, wHop2 = 1.0, 1.0
+
+func build2D_pingHop(peers []Peer) []poindexter.KDPoint[Peer] {
+    pings := make([]float64, len(peers))
+    hops  := make([]float64, len(peers))
+    for i, p := range peers { pings[i], hops[i] = p.PingMS, p.Hops }
+    pMin, pMax := minMax(pings)
+    hMin, hMax := minMax(hops)
+
+    pts := make([]poindexter.KDPoint[Peer], len(peers))
+    for i, p := range peers {
+        pingN := scale01(p.PingMS, pMin, pMax)
+        hopN  := scale01(p.Hops,   hMin, hMax)
+        pts[i] = poindexter.KDPoint[Peer]{
+            ID:    p.ID,
+            Value: p,
+            Coords: []float64{ wPing2*pingN, wHop2*hopN },
+        }
+    }
+    return pts
+}
+
+func demo2D() {
+    pts := build2D_pingHop(peers)
+    tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.ManhattanDistance{})) // L1 favors axis‑aligned tradeoffs
+    // Prefer very low ping, modest hops
+    query := []float64{wPing2*0.0, wHop2*0.3}
+    best, _, _ := tree.Nearest(query)
+    fmt.Println("2D best (ping+hop):", best.ID)
+}
+```
+
+## 3‑D: Ping + Hop + Geo
+
+Add geography to discourage far peers when latency is similar:
+
+```go
+var wPing3, wHop3, wGeo3 = 1.0, 0.7, 0.3
+
+func build3D_pingHopGeo(peers []Peer) []poindexter.KDPoint[Peer] {
+    pings := make([]float64, len(peers))
+    hops  := make([]float64, len(peers))
+    geos  := make([]float64, len(peers))
+    for i, p := range peers { pings[i], hops[i], geos[i] = p.PingMS, p.Hops, p.GeoKM }
+    pMin, pMax := minMax(pings)
+    hMin, hMax := minMax(hops)
+    gMin, gMax := minMax(geos)
+
+    pts := make([]poindexter.KDPoint[Peer], len(peers))
+    for i, p := range peers {
+        pingN := scale01(p.PingMS, pMin, pMax)
+        hopN  := scale01(p.Hops,   hMin, hMax)
+        geoN  := scale01(p.GeoKM,  gMin, gMax)
+        pts[i] = poindexter.KDPoint[Peer]{
+            ID:    p.ID,
+            Value: p,
+            Coords: []float64{ wPing3*pingN, wHop3*hopN, wGeo3*geoN },
+        }
+    }
+    return pts
+}
+
+func demo3D() {
+    pts := build3D_pingHopGeo(peers)
+    tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{}))
+    // Prefer low ping/hop, modest geo
+    query := []float64{wPing3*0.0, wHop3*0.2, wGeo3*0.4}
+    top, _, _ := tree.Nearest(query)
+    fmt.Println("3D best (ping+hop+geo):", top.ID)
+}
+```
+
+## Dynamic updates
+
+Your routing table changes constantly. Insert/remove peers without rebuilding:
+
+```go
+func updatesExample() {
+    pts := build2D_pingHop(peers)
+    tree, _ := poindexter.NewKDTree(pts)
+
+    // Insert a new peer
+    newPeer := Peer{ID: "Z", PingMS: 12, Hops: 2, GeoKM: 900, Score: 0.88}
+    // Build consistent 2D point for the new peer. In a real system retain normalization mins/maxes.
+    ptsZ := build2D_pingHop([]Peer{newPeer})
+    _ = tree.Insert(ptsZ[0])
+
+    // Delete by ID when peer goes offline
+    _ = tree.DeleteByID("Z")
+}
+```
+
+## Choosing a metric
+
+- Euclidean (L2): smooth trade‑offs across axes; good default for blended preferences
+- Manhattan (L1): emphasizes per‑axis absolute differences; useful when each unit of ping/hop matters equally
+- Chebyshev (L∞): min‑max style; dominated by the worst axis (e.g., reject any peer with too many hops regardless of ping)
+
+## Notes on production use
+
+- Keep and reuse normalization parameters (min/max or mean/std) rather than recomputing per query to avoid drift.
+- Consider capping outliers (e.g., clamp geo distances > 5000 km).
+- For large N (≥ 1e5) and low dims (≤ 8), consider swapping the internal engine to `gonum.org/v1/gonum/spatial/kdtree` behind the same API for faster queries.
--- a/kdtree_helpers.go
+++ b/kdtree_helpers.go
@ -0,0 +1,166 @@
+package poindexter
+
+// Helper builders for KDTree points with min-max normalization, optional inversion per-axis,
+// and per-axis weights. These are convenience utilities to make it easy to map domain
+// records into KD space for 2D/3D/4D use-cases.
+
+// minMax returns (min,max) of a slice.
+func minMax(xs []float64) (float64, float64) {
+	if len(xs) == 0 {
+		return 0, 0
+	}
+	mn, mx := xs[0], xs[0]
+	for _, v := range xs[1:] {
+		if v < mn {
+			mn = v
+		}
+		if v > mx {
+			mx = v
+		}
+	}
+	return mn, mx
+}
+
+// scale01 maps v from [min,max] to [0,1]. If min==max, returns 0.
+func scale01(v, min, max float64) float64 {
+	if max == min {
+		return 0
+	}
+	return (v - min) / (max - min)
+}
+
+// Build2D constructs normalized-and-weighted KD points from items using two feature extractors.
+// - id: function to provide a stable string ID (can return "" if you don't need DeleteByID)
+// - f1,f2: feature extractors (raw values)
+// - weights: per-axis weights applied after normalization
+// - invert: per-axis flags; if true, the axis is inverted (1-norm) so that higher raw values become lower cost
+func Build2D[T any](items []T, id func(T) string, f1, f2 func(T) float64, weights [2]float64, invert [2]bool) ([]KDPoint[T], error) {
+	if len(items) == 0 {
+		return nil, nil
+	}
+	vals1 := make([]float64, len(items))
+	vals2 := make([]float64, len(items))
+	for i, it := range items {
+		vals1[i] = f1(it)
+		vals2[i] = f2(it)
+	}
+	mn1, mx1 := minMax(vals1)
+	mn2, mx2 := minMax(vals2)
+
+	pts := make([]KDPoint[T], len(items))
+	for i, it := range items {
+		n1 := scale01(vals1[i], mn1, mx1)
+		n2 := scale01(vals2[i], mn2, mx2)
+		if invert[0] {
+			n1 = 1 - n1
+		}
+		if invert[1] {
+			n2 = 1 - n2
+		}
+		pts[i] = KDPoint[T]{
+			ID:    id(it),
+			Value: it,
+			Coords: []float64{
+				weights[0] * n1,
+				weights[1] * n2,
+			},
+		}
+	}
+	return pts, nil
+}
+
+// Build3D constructs normalized-and-weighted KD points using three feature extractors.
+func Build3D[T any](items []T, id func(T) string, f1, f2, f3 func(T) float64, weights [3]float64, invert [3]bool) ([]KDPoint[T], error) {
+	if len(items) == 0 {
+		return nil, nil
+	}
+	vals1 := make([]float64, len(items))
+	vals2 := make([]float64, len(items))
+	vals3 := make([]float64, len(items))
+	for i, it := range items {
+		vals1[i] = f1(it)
+		vals2[i] = f2(it)
+		vals3[i] = f3(it)
+	}
+	mn1, mx1 := minMax(vals1)
+	mn2, mx2 := minMax(vals2)
+	mn3, mx3 := minMax(vals3)
+
+	pts := make([]KDPoint[T], len(items))
+	for i, it := range items {
+		n1 := scale01(vals1[i], mn1, mx1)
+		n2 := scale01(vals2[i], mn2, mx2)
+		n3 := scale01(vals3[i], mn3, mx3)
+		if invert[0] {
+			n1 = 1 - n1
+		}
+		if invert[1] {
+			n2 = 1 - n2
+		}
+		if invert[2] {
+			n3 = 1 - n3
+		}
+		pts[i] = KDPoint[T]{
+			ID:    id(it),
+			Value: it,
+			Coords: []float64{
+				weights[0] * n1,
+				weights[1] * n2,
+				weights[2] * n3,
+			},
+		}
+	}
+	return pts, nil
+}
+
+// Build4D constructs normalized-and-weighted KD points using four feature extractors.
+func Build4D[T any](items []T, id func(T) string, f1, f2, f3, f4 func(T) float64, weights [4]float64, invert [4]bool) ([]KDPoint[T], error) {
+	if len(items) == 0 {
+		return nil, nil
+	}
+	vals1 := make([]float64, len(items))
+	vals2 := make([]float64, len(items))
+	vals3 := make([]float64, len(items))
+	vals4 := make([]float64, len(items))
+	for i, it := range items {
+		vals1[i] = f1(it)
+		vals2[i] = f2(it)
+		vals3[i] = f3(it)
+		vals4[i] = f4(it)
+	}
+	mn1, mx1 := minMax(vals1)
+	mn2, mx2 := minMax(vals2)
+	mn3, mx3 := minMax(vals3)
+	mn4, mx4 := minMax(vals4)
+
+	pts := make([]KDPoint[T], len(items))
+	for i, it := range items {
+		n1 := scale01(vals1[i], mn1, mx1)
+		n2 := scale01(vals2[i], mn2, mx2)
+		n3 := scale01(vals3[i], mn3, mx3)
+		n4 := scale01(vals4[i], mn4, mx4)
+		if invert[0] {
+			n1 = 1 - n1
+		}
+		if invert[1] {
+			n2 = 1 - n2
+		}
+		if invert[2] {
+			n3 = 1 - n3
+		}
+		if invert[3] {
+			n4 = 1 - n4
+		}
+		pts[i] = KDPoint[T]{
+			ID:    id(it),
+			Value: it,
+			Coords: []float64{
+				weights[0] * n1,
+				weights[1] * n2,
+				weights[2] * n3,
+				weights[3] * n4,
+			},
+		}
+	}
+	return pts, nil
+}
--- a/kdtree_helpers_test.go
+++ b/kdtree_helpers_test.go
@ -0,0 +1,112 @@
+package poindexter
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestBuild2D_NormalizationAndInversion(t *testing.T) {
+	type rec struct{ a, b float64 }
+	items := []rec{{a: 0, b: 100}, {a: 10, b: 300}}
+	// f1 over [0,10], f2 over [100,300]
+	pts, err := Build2D(items,
+		func(r rec) string { return "" },
+		func(r rec) float64 { return r.a },
+		func(r rec) float64 { return r.b },
+		[2]float64{2.0, 0.5},
+		[2]bool{true, false}, // invert first axis, not second
+	)
+	if err != nil {
+		t.Fatalf("Build2D err: %v", err)
+	}
+	if len(pts) != 2 {
+		t.Fatalf("expected 2 points, got %d", len(pts))
+	}
+	// item0: a=0 -> n1=0 -> invert -> 1 -> *2 = 2; b=100 -> n2=0 -> *0.5 = 0
+	if got := fmt.Sprintf("%.1f,%.1f", pts[0].Coords[0], pts[0].Coords[1]); got != "2.0,0.0" {
+		t.Fatalf("coords[0] = %s, want 2.0,0.0", got)
+	}
+	// item1: a=10 -> n1=1 -> invert -> 0 -> *2 = 0; b=300 -> n2=1 -> *0.5=0.5
+	if got := fmt.Sprintf("%.1f,%.1f", pts[1].Coords[0], pts[1].Coords[1]); got != "0.0,0.5" {
+		t.Fatalf("coords[1] = %s, want 0.0,0.5", got)
+	}
+}
+
+func TestBuild3D_AllEqualSafe(t *testing.T) {
+	type rec struct{ x, y, z float64 }
+	items := []rec{{1, 1, 1}, {1, 1, 1}}
+	pts, err := Build3D(items,
+		func(r rec) string { return "id" },
+		func(r rec) float64 { return r.x },
+		func(r rec) float64 { return r.y },
+		func(r rec) float64 { return r.z },
+		[3]float64{1, 1, 1},
+		[3]bool{false, false, false},
+	)
+	if err != nil {
+		t.Fatalf("Build3D err: %v", err)
+	}
+	if len(pts) != 2 {
+		t.Fatalf("len = %d", len(pts))
+	}
+	for i := range pts {
+		if len(pts[i].Coords) != 3 {
+			t.Fatalf("dim = %d", len(pts[i].Coords))
+		}
+		for _, c := range pts[i].Coords {
+			if c != 0 {
+				t.Fatalf("expected 0 when min==max, got %v", c)
+			}
+		}
+	}
+}
+
+// Example-style end-to-end sanity on 4D using the documented Peer data
+func TestBuild4D_EndToEnd_Example(t *testing.T) {
+	type Peer struct {
+		ID     string
+		PingMS float64
+		Hops   float64
+		GeoKM  float64
+		Score  float64
+	}
+	peers := []Peer{
+		{ID: "A", PingMS: 22, Hops: 3, GeoKM: 1200, Score: 0.86},
+		{ID: "B", PingMS: 34, Hops: 2, GeoKM: 800, Score: 0.91},
+		{ID: "C", PingMS: 15, Hops: 4, GeoKM: 4500, Score: 0.70},
+		{ID: "D", PingMS: 55, Hops: 1, GeoKM: 300, Score: 0.95},
+		{ID: "E", PingMS: 18, Hops: 2, GeoKM: 2200, Score: 0.80},
+	}
+	weights := [4]float64{1.0, 0.7, 0.2, 1.2}
+	invert := [4]bool{false, false, false, true} // flip score so higher score -> lower cost
+	pts, err := Build4D(peers,
+		func(p Peer) string { return p.ID },
+		func(p Peer) float64 { return p.PingMS },
+		func(p Peer) float64 { return p.Hops },
+		func(p Peer) float64 { return p.GeoKM },
+		func(p Peer) float64 { return p.Score },
+		weights, invert,
+	)
+	if err != nil {
+		t.Fatalf("Build4D err: %v", err)
+	}
+	if len(pts) != len(peers) {
+		t.Fatalf("len pts=%d", len(pts))
+	}
+	// Build KDTree and query near origin in normalized/weighted space (prefer minima on all axes)
+	tree, err := NewKDTree(pts, WithMetric(EuclideanDistance{}))
+	if err != nil {
+		t.Fatalf("NewKDTree err: %v", err)
+	}
+	if tree.Dim() != 4 {
+		t.Fatalf("dim=%d", tree.Dim())
+	}
+	best, _, ok := tree.Nearest([]float64{0, 0, 0, 0})
+	if !ok {
+		t.Fatalf("no nearest")
+	}
+	// With these weights and inversions, peer B emerges as closest in this setup.
+	if best.ID != "B" {
+		t.Fatalf("expected best B, got %s", best.ID)
+	}
+}
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -57,6 +57,7 @@ nav:
  - Getting Started: getting-started.md
  - Examples:
      - Best Ping Peer (DHT): dht-best-ping.md
+      - Multi-Dimensional KDTree (DHT): kdtree-multidimensional.md
  - API Reference: api.md
  - License: license.md

--- a/poindexter_test.go
+++ b/poindexter_test.go
@ -7,8 +7,8 @@ func TestVersion(t *testing.T) {
 	if version == "" {
 		t.Error("Version should not be empty")
 	}
-	if version != "0.1.0" {
-		t.Errorf("Expected version 0.1.0, got %s", version)
+	if version != "0.2.0" {
+		t.Errorf("Expected version 0.2.0, got %s", version)
 	}
 }