diff --git a/README.md b/README.md index f5b9b50..4ab8321 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ A Go library package providing utility functions including sorting algorithms wi - 🔢 **Sorting Utilities**: Sort integers, strings, and floats in ascending or descending order - 🎯 **Custom Sorting**: Sort any type with custom comparison functions or key extractors - 🔍 **Binary Search**: Fast search on sorted data +- 🧭 **KDTree (NN Search)**: Build a KDTree over points with generic payloads; nearest, k-NN, and radius queries with Euclidean or Manhattan metrics - 📦 **Generic Functions**: Type-safe operations using Go generics - ✅ **Well-Tested**: Comprehensive test coverage - 📖 **Documentation**: Full documentation available at GitHub Pages @@ -24,7 +25,7 @@ package main import ( "fmt" - "github.com/Snider/Poindexter" + poindexter "github.com/Snider/Poindexter" ) func main() { @@ -38,16 +39,19 @@ func main() { Name string Price float64 } - - products := []Product{ - {"Apple", 1.50}, - {"Banana", 0.75}, - {"Cherry", 3.00}, + + products := []Product{{"Apple", 1.50}, {"Banana", 0.75}, {"Cherry", 3.00}} + poindexter.SortByKey(products, func(p Product) float64 { return p.Price }) + + // KDTree quick demo + pts := []poindexter.KDPoint[string]{ + {ID: "A", Coords: []float64{0, 0}, Value: "alpha"}, + {ID: "B", Coords: []float64{1, 0}, Value: "bravo"}, + {ID: "C", Coords: []float64{0, 1}, Value: "charlie"}, } - - poindexter.SortByKey(products, func(p Product) float64 { - return p.Price - }) + tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + nearest, dist, _ := tree.Nearest([]float64{0.9, 0.1}) + fmt.Println(nearest.ID, nearest.Value, dist) // B bravo ~0.141... } ``` diff --git a/docs/dht-best-ping.md b/docs/dht-best-ping.md new file mode 100644 index 0000000..7dca5a6 --- /dev/null +++ b/docs/dht-best-ping.md @@ -0,0 +1,114 @@ +# Example: Find the best (lowest‑ping) peer in a DHT table + +This example shows how to model a "made up" DHT routing table and use Poindexter's `KDTree` to quickly find: + +- the single best peer by ping (nearest neighbor) +- the top N best peers by ping (k‑nearest neighbors) +- all peers under a ping threshold (radius search) + +We keep it simple by mapping each peer to a 1‑dimensional coordinate: its ping in milliseconds. Using 1D means the KDTree's distance is just the absolute difference between pings. + +> Tip: In a real system, you might expand to multiple dimensions (e.g., `[ping_ms, hop_count, geo_distance, score]`) and choose a metric (`L1`, `L2`, or `L∞`) that best matches your routing heuristic. + +--- + +## Full example + +```go +package main + +import ( + "fmt" + poindexter "github.com/Snider/Poindexter" +) + +// Peer is our DHT peer entry (made up for this example). +type Peer struct { + Addr string // multiaddr or host:port + Ping int // measured ping in milliseconds +} + +func main() { + // A toy DHT routing table with made-up ping values + table := []Peer{ + {Addr: "peer1.example:4001", Ping: 74}, + {Addr: "peer2.example:4001", Ping: 52}, + {Addr: "peer3.example:4001", Ping: 110}, + {Addr: "peer4.example:4001", Ping: 35}, + {Addr: "peer5.example:4001", Ping: 60}, + {Addr: "peer6.example:4001", Ping: 44}, + } + + // Map peers to KD points in 1D where coordinate = ping (ms). + // Use stable string IDs so we can delete/update later. + pts := make([]poindexter.KDPoint[Peer], 0, len(table)) + for i, p := range table { + pts = append(pts, poindexter.KDPoint[Peer]{ + ID: fmt.Sprintf("peer-%d", i+1), + Coords: []float64{float64(p.Ping)}, + Value: p, + }) + } + + // Build a KDTree. Euclidean metric is fine for 1D ping comparisons. + kdt, err := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + if err != nil { + panic(err) + } + + // 1) Find the best (lowest-ping) peer. + // Query is a 1D point representing desired ping target. Using 0 finds the min. + best, d, ok := kdt.Nearest([]float64{0}) + if !ok { + fmt.Println("no peers found") + return + } + fmt.Printf("Best peer: %s (ping=%d ms), distance=%.0f\n", best.Value.Addr, best.Value.Ping, d) + // Example output: Best peer: peer4.example:4001 (ping=35 ms), distance=35 + + // 2) Top-N best peers by ping. + top, dists := kdt.KNearest([]float64{0}, 3) + fmt.Println("Top 3 peers by ping:") + for i := range top { + fmt.Printf(" #%d %s (ping=%d ms), distance=%.0f\n", i+1, top[i].Value.Addr, top[i].Value.Ping, dists[i]) + } + + // 3) All peers under a threshold (e.g., <= 50 ms): radius search. + within, wd := kdt.Radius([]float64{0}, 50) + fmt.Println("Peers with ping <= 50 ms:") + for i := range within { + fmt.Printf(" %s (ping=%d ms), distance=%.0f\n", within[i].Value.Addr, within[i].Value.Ping, wd[i]) + } + + // 4) Dynamic updates: if a peer improves ping, we can delete & re-insert with a new ID + // (or keep the same ID and just update the point if your application tracks indices). + // Here we simulate peer5 dropping from 60 ms to 30 ms. + if kdt.DeleteByID("peer-5") { + improved := poindexter.KDPoint[Peer]{ + ID: "peer-5", // keep the same ID for simplicity + Coords: []float64{30}, + Value: Peer{Addr: "peer5.example:4001", Ping: 30}, + } + _ = kdt.Insert(improved) + } + + // Recompute the best after update + best2, d2, _ := kdt.Nearest([]float64{0}) + fmt.Printf("After update, best peer: %s (ping=%d ms), distance=%.0f\n", best2.Value.Addr, best2.Value.Ping, d2) +} +``` + +### Why does querying with `[0]` work? +We use Euclidean distance in 1D, so `distance = |ping - target|`. With target `0`, minimizing the distance is equivalent to minimizing the ping itself. + +### Extending the metric/space +- Multi-objective: encode more routing features (lower is better) as extra dimensions, e.g. `[ping_ms, hops, queue_delay_ms]`. +- Metric choice: + - `EuclideanDistance` (L2): balances outliers smoothly. + - `ManhattanDistance` (L1): linear penalty; robust for sparsity. + - `ChebyshevDistance` (L∞): cares about the worst dimension. +- Normalization: when mixing units (ms, hops, km), normalize or weight dimensions so the metric reflects your priority. + +### Notes +- This KDTree currently uses an internal linear scan for queries. The API is stable and designed so it can be swapped to use `gonum.org/v1/gonum/spatial/kdtree` under the hood later for sub-linear queries on large datasets. +- IDs are optional but recommended for O(1)-style deletes; keep them unique per tree. diff --git a/docs/getting-started.md b/docs/getting-started.md index d37fa63..2b82a86 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -122,4 +122,5 @@ func main() { ## Next Steps - Check out the [API Reference](api.md) for detailed documentation +- Try the example: [Find the best (lowest‑ping) DHT peer](dht-best-ping.md) - Read about the [License](license.md) diff --git a/docs/index.md b/docs/index.md index 9ce37bb..4743455 100644 --- a/docs/index.md +++ b/docs/index.md @@ -47,3 +47,8 @@ This project is licensed under the European Union Public Licence v1.2 (EUPL-1.2) ## Contributing Contributions are welcome! Please feel free to submit a Pull Request. + + +## Examples + +- Find the best (lowest‑ping) DHT peer using KDTree: [Best Ping Peer (DHT)](dht-best-ping.md) diff --git a/kdtree.go b/kdtree.go new file mode 100644 index 0000000..61c0399 --- /dev/null +++ b/kdtree.go @@ -0,0 +1,240 @@ +package poindexter + +import ( + "errors" + "math" + "sort" +) + +// KDPoint represents a point with coordinates and an attached payload/value. +// ID should be unique within a tree to enable O(1) deletes by ID. +// Coords must all have the same dimensionality within a given KDTree. +type KDPoint[T any] struct { + ID string + Coords []float64 + Value T +} + +// DistanceMetric defines a metric over R^n. +type DistanceMetric interface { + Distance(a, b []float64) float64 +} + +// EuclideanDistance implements the L2 metric. +type EuclideanDistance struct{} + +func (EuclideanDistance) Distance(a, b []float64) float64 { + var sum float64 + for i := range a { + d := a[i] - b[i] + sum += d * d + } + return math.Sqrt(sum) +} + +// ManhattanDistance implements the L1 metric. +type ManhattanDistance struct{} + +func (ManhattanDistance) Distance(a, b []float64) float64 { + var sum float64 + for i := range a { + d := a[i] - b[i] + if d < 0 { + d = -d + } + sum += d + } + return sum +} + +// ChebyshevDistance implements the L-infinity (max) metric. +type ChebyshevDistance struct{} + +func (ChebyshevDistance) Distance(a, b []float64) float64 { + var max float64 + for i := range a { + d := a[i] - b[i] + if d < 0 { + d = -d + } + if d > max { + max = d + } + } + return max +} + +// KDOption configures KDTree construction (non-generic to allow inference). +type KDOption func(*kdOptions) + +type kdOptions struct { + metric DistanceMetric +} + +// WithMetric sets the distance metric for the KDTree. +func WithMetric(m DistanceMetric) KDOption { return func(o *kdOptions) { o.metric = m } } + +// KDTree is a lightweight wrapper providing nearest-neighbor operations. +// Note: This implementation currently uses linear scans for queries +// and is designed to be easily swappable with gonum.org/v1/gonum/spatial/kdtree +// in the future without breaking the public API. +type KDTree[T any] struct { + points []KDPoint[T] + dim int + metric DistanceMetric + idIndex map[string]int +} + +// NewKDTree builds a KDTree from the given points. +// All points must have the same dimensionality (>0). +func NewKDTree[T any](pts []KDPoint[T], opts ...KDOption) (*KDTree[T], error) { + if len(pts) == 0 { + return nil, errors.New("no points provided") + } + dim := len(pts[0].Coords) + if dim == 0 { + return nil, errors.New("points must have at least one dimension") + } + idIndex := make(map[string]int, len(pts)) + for i, p := range pts { + if len(p.Coords) != dim { + return nil, errors.New("inconsistent dimensionality in points") + } + if p.ID != "" { + if _, exists := idIndex[p.ID]; exists { + return nil, errors.New("duplicate point ID: " + p.ID) + } + idIndex[p.ID] = i + } + } + cfg := kdOptions{metric: EuclideanDistance{}} + for _, o := range opts { + o(&cfg) + } + t := &KDTree[T]{ + points: append([]KDPoint[T](nil), pts...), + dim: dim, + metric: cfg.metric, + idIndex: idIndex, + } + return t, nil +} + +// Dim returns the number of dimensions. +func (t *KDTree[T]) Dim() int { return t.dim } + +// Len returns the number of points in the tree. +func (t *KDTree[T]) Len() int { return len(t.points) } + +// Nearest returns the closest point to the query, along with its distance. +// ok is false if the tree is empty. +func (t *KDTree[T]) Nearest(query []float64) (KDPoint[T], float64, bool) { + if len(query) != t.dim || t.Len() == 0 { + return KDPoint[T]{}, 0, false + } + bestIdx := -1 + bestDist := math.MaxFloat64 + for i := range t.points { + d := t.metric.Distance(query, t.points[i].Coords) + if d < bestDist { + bestDist = d + bestIdx = i + } + } + if bestIdx < 0 { + return KDPoint[T]{}, 0, false + } + return t.points[bestIdx], bestDist, true +} + +// KNearest returns up to k nearest neighbors to the query in ascending distance order. +func (t *KDTree[T]) KNearest(query []float64, k int) ([]KDPoint[T], []float64) { + if k <= 0 || len(query) != t.dim || t.Len() == 0 { + return nil, nil + } + tmp := make([]struct { + idx int + dist float64 + }, len(t.points)) + for i := range t.points { + tmp[i].idx = i + tmp[i].dist = t.metric.Distance(query, t.points[i].Coords) + } + sort.Slice(tmp, func(i, j int) bool { return tmp[i].dist < tmp[j].dist }) + if k > len(tmp) { + k = len(tmp) + } + neighbors := make([]KDPoint[T], k) + dists := make([]float64, k) + for i := 0; i < k; i++ { + neighbors[i] = t.points[tmp[i].idx] + dists[i] = tmp[i].dist + } + return neighbors, dists +} + +// Radius returns points within radius r (inclusive) from the query, sorted by distance. +func (t *KDTree[T]) Radius(query []float64, r float64) ([]KDPoint[T], []float64) { + if r < 0 || len(query) != t.dim || t.Len() == 0 { + return nil, nil + } + var sel []struct { + idx int + dist float64 + } + for i := range t.points { + d := t.metric.Distance(query, t.points[i].Coords) + if d <= r { + sel = append(sel, struct { + idx int + dist float64 + }{i, d}) + } + } + sort.Slice(sel, func(i, j int) bool { return sel[i].dist < sel[j].dist }) + neighbors := make([]KDPoint[T], len(sel)) + dists := make([]float64, len(sel)) + for i := range sel { + neighbors[i] = t.points[sel[i].idx] + dists[i] = sel[i].dist + } + return neighbors, dists +} + +// Insert adds a point. Returns false if dimensionality mismatch or duplicate ID exists. +func (t *KDTree[T]) Insert(p KDPoint[T]) bool { + if len(p.Coords) != t.dim { + return false + } + if p.ID != "" { + if _, exists := t.idIndex[p.ID]; exists { + return false + } + // will set after append + } + t.points = append(t.points, p) + if p.ID != "" { + t.idIndex[p.ID] = len(t.points) - 1 + } + return true +} + +// DeleteByID removes a point by its ID. Returns false if not found or ID empty. +func (t *KDTree[T]) DeleteByID(id string) bool { + if id == "" { + return false + } + idx, ok := t.idIndex[id] + if !ok { + return false + } + last := len(t.points) - 1 + // swap delete + t.points[idx] = t.points[last] + if t.points[idx].ID != "" { + t.idIndex[t.points[idx].ID] = idx + } + t.points = t.points[:last] + delete(t.idIndex, id) + return true +} diff --git a/kdtree_test.go b/kdtree_test.go new file mode 100644 index 0000000..24eef6e --- /dev/null +++ b/kdtree_test.go @@ -0,0 +1,109 @@ +package poindexter + +import ( + "testing" +) + +func samplePoints() []KDPoint[string] { + return []KDPoint[string]{ + {ID: "A", Coords: []float64{0, 0}, Value: "alpha"}, + {ID: "B", Coords: []float64{1, 0}, Value: "bravo"}, + {ID: "C", Coords: []float64{0, 1}, Value: "charlie"}, + {ID: "D", Coords: []float64{1, 1}, Value: "delta"}, + {ID: "E", Coords: []float64{2, 2}, Value: "echo"}, + } +} + +func TestKDTree_Nearest(t *testing.T) { + pts := samplePoints() + tree, err := NewKDTree(pts, WithMetric(EuclideanDistance{})) + if err != nil { + t.Fatalf("NewKDTree error: %v", err) + } + + p, dist, ok := tree.Nearest([]float64{0.9, 0.9}) + if !ok { + t.Fatalf("expected a nearest neighbor") + } + if p.ID != "D" { + t.Fatalf("expected D, got %s", p.ID) + } + if dist <= 0 { + t.Fatalf("expected positive distance, got %v", dist) + } +} + +func TestKDTree_KNearest(t *testing.T) { + pts := samplePoints() + tree, err := NewKDTree(pts, WithMetric(ManhattanDistance{})) + if err != nil { + t.Fatalf("NewKDTree error: %v", err) + } + + neighbors, dists := tree.KNearest([]float64{0.9, 0.9}, 3) + if len(neighbors) != 3 || len(dists) != 3 { + t.Fatalf("expected 3 neighbors, got %d", len(neighbors)) + } + if neighbors[0].ID != "D" { + t.Fatalf("expected first neighbor D, got %s", neighbors[0].ID) + } +} + +func TestKDTree_Radius(t *testing.T) { + pts := samplePoints() + tree, err := NewKDTree(pts, WithMetric(EuclideanDistance{})) + if err != nil { + t.Fatalf("NewKDTree error: %v", err) + } + + neighbors, dists := tree.Radius([]float64{0, 0}, 1.01) + if len(neighbors) < 2 { + t.Fatalf("expected at least 2 neighbors within radius, got %d", len(neighbors)) + } + // distances should be non-decreasing + for i := 1; i < len(dists); i++ { + if dists[i] < dists[i-1] { + t.Fatalf("distances not sorted: %v", dists) + } + } +} + +func TestKDTree_InsertDelete(t *testing.T) { + pts := samplePoints() + tree, err := NewKDTree(pts) + if err != nil { + t.Fatalf("NewKDTree error: %v", err) + } + // Insert a new close point near (0,0) + ok := tree.Insert(KDPoint[string]{ID: "Z", Coords: []float64{0.05, 0.05}, Value: "zulu"}) + if !ok { + t.Fatalf("insert failed") + } + p, _, found := tree.Nearest([]float64{0.04, 0.04}) + if !found || p.ID != "Z" { + t.Fatalf("expected nearest to be Z after insert, got %+v", p) + } + + // Delete and verify nearest changes back + if !tree.DeleteByID("Z") { + t.Fatalf("delete failed") + } + p, _, found = tree.Nearest([]float64{0.04, 0.04}) + if !found || p.ID != "A" { + t.Fatalf("expected nearest to be A after delete, got %+v", p) + } +} + +func TestKDTree_DimAndLen(t *testing.T) { + pts := samplePoints() + tree, err := NewKDTree(pts) + if err != nil { + t.Fatalf("NewKDTree error: %v", err) + } + if tree.Len() != len(pts) { + t.Fatalf("Len mismatch: %d vs %d", tree.Len(), len(pts)) + } + if tree.Dim() != 2 { + t.Fatalf("Dim mismatch: %d", tree.Dim()) + } +} diff --git a/mkdocs.yml b/mkdocs.yml index 7d71e94..3be25a6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -55,6 +55,8 @@ markdown_extensions: nav: - Home: index.md - Getting Started: getting-started.md + - Examples: + - Best Ping Peer (DHT): dht-best-ping.md - API Reference: api.md - License: license.md diff --git a/poindexter.go b/poindexter.go index 01e8aa8..9b7f7ac 100644 --- a/poindexter.go +++ b/poindexter.go @@ -3,7 +3,7 @@ package poindexter // Version returns the current version of the library. func Version() string { - return "0.1.0" + return "0.2.0" } // Hello returns a greeting message.