Add KDTree implementation and example for finding the best DHT peer by ping

This commit is contained in:
Snider 2025-11-03 16:49:31 +00:00
parent dbcad82d6b
commit 736ce911e0
8 changed files with 486 additions and 11 deletions

View file

@ -7,6 +7,7 @@ A Go library package providing utility functions including sorting algorithms wi
- 🔢 **Sorting Utilities**: Sort integers, strings, and floats in ascending or descending order - 🔢 **Sorting Utilities**: Sort integers, strings, and floats in ascending or descending order
- 🎯 **Custom Sorting**: Sort any type with custom comparison functions or key extractors - 🎯 **Custom Sorting**: Sort any type with custom comparison functions or key extractors
- 🔍 **Binary Search**: Fast search on sorted data - 🔍 **Binary Search**: Fast search on sorted data
- 🧭 **KDTree (NN Search)**: Build a KDTree over points with generic payloads; nearest, k-NN, and radius queries with Euclidean or Manhattan metrics
- 📦 **Generic Functions**: Type-safe operations using Go generics - 📦 **Generic Functions**: Type-safe operations using Go generics
- ✅ **Well-Tested**: Comprehensive test coverage - ✅ **Well-Tested**: Comprehensive test coverage
- 📖 **Documentation**: Full documentation available at GitHub Pages - 📖 **Documentation**: Full documentation available at GitHub Pages
@ -24,7 +25,7 @@ package main
import ( import (
"fmt" "fmt"
"github.com/Snider/Poindexter" poindexter "github.com/Snider/Poindexter"
) )
func main() { func main() {
@ -38,16 +39,19 @@ func main() {
Name string Name string
Price float64 Price float64
} }
products := []Product{ products := []Product{{"Apple", 1.50}, {"Banana", 0.75}, {"Cherry", 3.00}}
{"Apple", 1.50}, poindexter.SortByKey(products, func(p Product) float64 { return p.Price })
{"Banana", 0.75},
{"Cherry", 3.00}, // KDTree quick demo
pts := []poindexter.KDPoint[string]{
{ID: "A", Coords: []float64{0, 0}, Value: "alpha"},
{ID: "B", Coords: []float64{1, 0}, Value: "bravo"},
{ID: "C", Coords: []float64{0, 1}, Value: "charlie"},
} }
tree, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{}))
poindexter.SortByKey(products, func(p Product) float64 { nearest, dist, _ := tree.Nearest([]float64{0.9, 0.1})
return p.Price fmt.Println(nearest.ID, nearest.Value, dist) // B bravo ~0.141...
})
} }
``` ```

114
docs/dht-best-ping.md Normal file
View file

@ -0,0 +1,114 @@
# Example: Find the best (lowestping) peer in a DHT table
This example shows how to model a "made up" DHT routing table and use Poindexter's `KDTree` to quickly find:
- the single best peer by ping (nearest neighbor)
- the top N best peers by ping (knearest neighbors)
- all peers under a ping threshold (radius search)
We keep it simple by mapping each peer to a 1dimensional coordinate: its ping in milliseconds. Using 1D means the KDTree's distance is just the absolute difference between pings.
> Tip: In a real system, you might expand to multiple dimensions (e.g., `[ping_ms, hop_count, geo_distance, score]`) and choose a metric (`L1`, `L2`, or `L∞`) that best matches your routing heuristic.
---
## Full example
```go
package main
import (
"fmt"
poindexter "github.com/Snider/Poindexter"
)
// Peer is our DHT peer entry (made up for this example).
type Peer struct {
Addr string // multiaddr or host:port
Ping int // measured ping in milliseconds
}
func main() {
// A toy DHT routing table with made-up ping values
table := []Peer{
{Addr: "peer1.example:4001", Ping: 74},
{Addr: "peer2.example:4001", Ping: 52},
{Addr: "peer3.example:4001", Ping: 110},
{Addr: "peer4.example:4001", Ping: 35},
{Addr: "peer5.example:4001", Ping: 60},
{Addr: "peer6.example:4001", Ping: 44},
}
// Map peers to KD points in 1D where coordinate = ping (ms).
// Use stable string IDs so we can delete/update later.
pts := make([]poindexter.KDPoint[Peer], 0, len(table))
for i, p := range table {
pts = append(pts, poindexter.KDPoint[Peer]{
ID: fmt.Sprintf("peer-%d", i+1),
Coords: []float64{float64(p.Ping)},
Value: p,
})
}
// Build a KDTree. Euclidean metric is fine for 1D ping comparisons.
kdt, err := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{}))
if err != nil {
panic(err)
}
// 1) Find the best (lowest-ping) peer.
// Query is a 1D point representing desired ping target. Using 0 finds the min.
best, d, ok := kdt.Nearest([]float64{0})
if !ok {
fmt.Println("no peers found")
return
}
fmt.Printf("Best peer: %s (ping=%d ms), distance=%.0f\n", best.Value.Addr, best.Value.Ping, d)
// Example output: Best peer: peer4.example:4001 (ping=35 ms), distance=35
// 2) Top-N best peers by ping.
top, dists := kdt.KNearest([]float64{0}, 3)
fmt.Println("Top 3 peers by ping:")
for i := range top {
fmt.Printf(" #%d %s (ping=%d ms), distance=%.0f\n", i+1, top[i].Value.Addr, top[i].Value.Ping, dists[i])
}
// 3) All peers under a threshold (e.g., <= 50 ms): radius search.
within, wd := kdt.Radius([]float64{0}, 50)
fmt.Println("Peers with ping <= 50 ms:")
for i := range within {
fmt.Printf(" %s (ping=%d ms), distance=%.0f\n", within[i].Value.Addr, within[i].Value.Ping, wd[i])
}
// 4) Dynamic updates: if a peer improves ping, we can delete & re-insert with a new ID
// (or keep the same ID and just update the point if your application tracks indices).
// Here we simulate peer5 dropping from 60 ms to 30 ms.
if kdt.DeleteByID("peer-5") {
improved := poindexter.KDPoint[Peer]{
ID: "peer-5", // keep the same ID for simplicity
Coords: []float64{30},
Value: Peer{Addr: "peer5.example:4001", Ping: 30},
}
_ = kdt.Insert(improved)
}
// Recompute the best after update
best2, d2, _ := kdt.Nearest([]float64{0})
fmt.Printf("After update, best peer: %s (ping=%d ms), distance=%.0f\n", best2.Value.Addr, best2.Value.Ping, d2)
}
```
### Why does querying with `[0]` work?
We use Euclidean distance in 1D, so `distance = |ping - target|`. With target `0`, minimizing the distance is equivalent to minimizing the ping itself.
### Extending the metric/space
- Multi-objective: encode more routing features (lower is better) as extra dimensions, e.g. `[ping_ms, hops, queue_delay_ms]`.
- Metric choice:
- `EuclideanDistance` (L2): balances outliers smoothly.
- `ManhattanDistance` (L1): linear penalty; robust for sparsity.
- `ChebyshevDistance` (L∞): cares about the worst dimension.
- Normalization: when mixing units (ms, hops, km), normalize or weight dimensions so the metric reflects your priority.
### Notes
- This KDTree currently uses an internal linear scan for queries. The API is stable and designed so it can be swapped to use `gonum.org/v1/gonum/spatial/kdtree` under the hood later for sub-linear queries on large datasets.
- IDs are optional but recommended for O(1)-style deletes; keep them unique per tree.

View file

@ -122,4 +122,5 @@ func main() {
## Next Steps ## Next Steps
- Check out the [API Reference](api.md) for detailed documentation - Check out the [API Reference](api.md) for detailed documentation
- Try the example: [Find the best (lowestping) DHT peer](dht-best-ping.md)
- Read about the [License](license.md) - Read about the [License](license.md)

View file

@ -47,3 +47,8 @@ This project is licensed under the European Union Public Licence v1.2 (EUPL-1.2)
## Contributing ## Contributing
Contributions are welcome! Please feel free to submit a Pull Request. Contributions are welcome! Please feel free to submit a Pull Request.
## Examples
- Find the best (lowestping) DHT peer using KDTree: [Best Ping Peer (DHT)](dht-best-ping.md)

240
kdtree.go Normal file
View file

@ -0,0 +1,240 @@
package poindexter
import (
"errors"
"math"
"sort"
)
// KDPoint represents a point with coordinates and an attached payload/value.
// ID should be unique within a tree to enable O(1) deletes by ID.
// Coords must all have the same dimensionality within a given KDTree.
type KDPoint[T any] struct {
ID string
Coords []float64
Value T
}
// DistanceMetric defines a metric over R^n.
type DistanceMetric interface {
Distance(a, b []float64) float64
}
// EuclideanDistance implements the L2 metric.
type EuclideanDistance struct{}
func (EuclideanDistance) Distance(a, b []float64) float64 {
var sum float64
for i := range a {
d := a[i] - b[i]
sum += d * d
}
return math.Sqrt(sum)
}
// ManhattanDistance implements the L1 metric.
type ManhattanDistance struct{}
func (ManhattanDistance) Distance(a, b []float64) float64 {
var sum float64
for i := range a {
d := a[i] - b[i]
if d < 0 {
d = -d
}
sum += d
}
return sum
}
// ChebyshevDistance implements the L-infinity (max) metric.
type ChebyshevDistance struct{}
func (ChebyshevDistance) Distance(a, b []float64) float64 {
var max float64
for i := range a {
d := a[i] - b[i]
if d < 0 {
d = -d
}
if d > max {
max = d
}
}
return max
}
// KDOption configures KDTree construction (non-generic to allow inference).
type KDOption func(*kdOptions)
type kdOptions struct {
metric DistanceMetric
}
// WithMetric sets the distance metric for the KDTree.
func WithMetric(m DistanceMetric) KDOption { return func(o *kdOptions) { o.metric = m } }
// KDTree is a lightweight wrapper providing nearest-neighbor operations.
// Note: This implementation currently uses linear scans for queries
// and is designed to be easily swappable with gonum.org/v1/gonum/spatial/kdtree
// in the future without breaking the public API.
type KDTree[T any] struct {
points []KDPoint[T]
dim int
metric DistanceMetric
idIndex map[string]int
}
// NewKDTree builds a KDTree from the given points.
// All points must have the same dimensionality (>0).
func NewKDTree[T any](pts []KDPoint[T], opts ...KDOption) (*KDTree[T], error) {
if len(pts) == 0 {
return nil, errors.New("no points provided")
}
dim := len(pts[0].Coords)
if dim == 0 {
return nil, errors.New("points must have at least one dimension")
}
idIndex := make(map[string]int, len(pts))
for i, p := range pts {
if len(p.Coords) != dim {
return nil, errors.New("inconsistent dimensionality in points")
}
if p.ID != "" {
if _, exists := idIndex[p.ID]; exists {
return nil, errors.New("duplicate point ID: " + p.ID)
}
idIndex[p.ID] = i
}
}
cfg := kdOptions{metric: EuclideanDistance{}}
for _, o := range opts {
o(&cfg)
}
t := &KDTree[T]{
points: append([]KDPoint[T](nil), pts...),
dim: dim,
metric: cfg.metric,
idIndex: idIndex,
}
return t, nil
}
// Dim returns the number of dimensions.
func (t *KDTree[T]) Dim() int { return t.dim }
// Len returns the number of points in the tree.
func (t *KDTree[T]) Len() int { return len(t.points) }
// Nearest returns the closest point to the query, along with its distance.
// ok is false if the tree is empty.
func (t *KDTree[T]) Nearest(query []float64) (KDPoint[T], float64, bool) {
if len(query) != t.dim || t.Len() == 0 {
return KDPoint[T]{}, 0, false
}
bestIdx := -1
bestDist := math.MaxFloat64
for i := range t.points {
d := t.metric.Distance(query, t.points[i].Coords)
if d < bestDist {
bestDist = d
bestIdx = i
}
}
if bestIdx < 0 {
return KDPoint[T]{}, 0, false
}
return t.points[bestIdx], bestDist, true
}
// KNearest returns up to k nearest neighbors to the query in ascending distance order.
func (t *KDTree[T]) KNearest(query []float64, k int) ([]KDPoint[T], []float64) {
if k <= 0 || len(query) != t.dim || t.Len() == 0 {
return nil, nil
}
tmp := make([]struct {
idx int
dist float64
}, len(t.points))
for i := range t.points {
tmp[i].idx = i
tmp[i].dist = t.metric.Distance(query, t.points[i].Coords)
}
sort.Slice(tmp, func(i, j int) bool { return tmp[i].dist < tmp[j].dist })
if k > len(tmp) {
k = len(tmp)
}
neighbors := make([]KDPoint[T], k)
dists := make([]float64, k)
for i := 0; i < k; i++ {
neighbors[i] = t.points[tmp[i].idx]
dists[i] = tmp[i].dist
}
return neighbors, dists
}
// Radius returns points within radius r (inclusive) from the query, sorted by distance.
func (t *KDTree[T]) Radius(query []float64, r float64) ([]KDPoint[T], []float64) {
if r < 0 || len(query) != t.dim || t.Len() == 0 {
return nil, nil
}
var sel []struct {
idx int
dist float64
}
for i := range t.points {
d := t.metric.Distance(query, t.points[i].Coords)
if d <= r {
sel = append(sel, struct {
idx int
dist float64
}{i, d})
}
}
sort.Slice(sel, func(i, j int) bool { return sel[i].dist < sel[j].dist })
neighbors := make([]KDPoint[T], len(sel))
dists := make([]float64, len(sel))
for i := range sel {
neighbors[i] = t.points[sel[i].idx]
dists[i] = sel[i].dist
}
return neighbors, dists
}
// Insert adds a point. Returns false if dimensionality mismatch or duplicate ID exists.
func (t *KDTree[T]) Insert(p KDPoint[T]) bool {
if len(p.Coords) != t.dim {
return false
}
if p.ID != "" {
if _, exists := t.idIndex[p.ID]; exists {
return false
}
// will set after append
}
t.points = append(t.points, p)
if p.ID != "" {
t.idIndex[p.ID] = len(t.points) - 1
}
return true
}
// DeleteByID removes a point by its ID. Returns false if not found or ID empty.
func (t *KDTree[T]) DeleteByID(id string) bool {
if id == "" {
return false
}
idx, ok := t.idIndex[id]
if !ok {
return false
}
last := len(t.points) - 1
// swap delete
t.points[idx] = t.points[last]
if t.points[idx].ID != "" {
t.idIndex[t.points[idx].ID] = idx
}
t.points = t.points[:last]
delete(t.idIndex, id)
return true
}

109
kdtree_test.go Normal file
View file

@ -0,0 +1,109 @@
package poindexter
import (
"testing"
)
func samplePoints() []KDPoint[string] {
return []KDPoint[string]{
{ID: "A", Coords: []float64{0, 0}, Value: "alpha"},
{ID: "B", Coords: []float64{1, 0}, Value: "bravo"},
{ID: "C", Coords: []float64{0, 1}, Value: "charlie"},
{ID: "D", Coords: []float64{1, 1}, Value: "delta"},
{ID: "E", Coords: []float64{2, 2}, Value: "echo"},
}
}
func TestKDTree_Nearest(t *testing.T) {
pts := samplePoints()
tree, err := NewKDTree(pts, WithMetric(EuclideanDistance{}))
if err != nil {
t.Fatalf("NewKDTree error: %v", err)
}
p, dist, ok := tree.Nearest([]float64{0.9, 0.9})
if !ok {
t.Fatalf("expected a nearest neighbor")
}
if p.ID != "D" {
t.Fatalf("expected D, got %s", p.ID)
}
if dist <= 0 {
t.Fatalf("expected positive distance, got %v", dist)
}
}
func TestKDTree_KNearest(t *testing.T) {
pts := samplePoints()
tree, err := NewKDTree(pts, WithMetric(ManhattanDistance{}))
if err != nil {
t.Fatalf("NewKDTree error: %v", err)
}
neighbors, dists := tree.KNearest([]float64{0.9, 0.9}, 3)
if len(neighbors) != 3 || len(dists) != 3 {
t.Fatalf("expected 3 neighbors, got %d", len(neighbors))
}
if neighbors[0].ID != "D" {
t.Fatalf("expected first neighbor D, got %s", neighbors[0].ID)
}
}
func TestKDTree_Radius(t *testing.T) {
pts := samplePoints()
tree, err := NewKDTree(pts, WithMetric(EuclideanDistance{}))
if err != nil {
t.Fatalf("NewKDTree error: %v", err)
}
neighbors, dists := tree.Radius([]float64{0, 0}, 1.01)
if len(neighbors) < 2 {
t.Fatalf("expected at least 2 neighbors within radius, got %d", len(neighbors))
}
// distances should be non-decreasing
for i := 1; i < len(dists); i++ {
if dists[i] < dists[i-1] {
t.Fatalf("distances not sorted: %v", dists)
}
}
}
func TestKDTree_InsertDelete(t *testing.T) {
pts := samplePoints()
tree, err := NewKDTree(pts)
if err != nil {
t.Fatalf("NewKDTree error: %v", err)
}
// Insert a new close point near (0,0)
ok := tree.Insert(KDPoint[string]{ID: "Z", Coords: []float64{0.05, 0.05}, Value: "zulu"})
if !ok {
t.Fatalf("insert failed")
}
p, _, found := tree.Nearest([]float64{0.04, 0.04})
if !found || p.ID != "Z" {
t.Fatalf("expected nearest to be Z after insert, got %+v", p)
}
// Delete and verify nearest changes back
if !tree.DeleteByID("Z") {
t.Fatalf("delete failed")
}
p, _, found = tree.Nearest([]float64{0.04, 0.04})
if !found || p.ID != "A" {
t.Fatalf("expected nearest to be A after delete, got %+v", p)
}
}
func TestKDTree_DimAndLen(t *testing.T) {
pts := samplePoints()
tree, err := NewKDTree(pts)
if err != nil {
t.Fatalf("NewKDTree error: %v", err)
}
if tree.Len() != len(pts) {
t.Fatalf("Len mismatch: %d vs %d", tree.Len(), len(pts))
}
if tree.Dim() != 2 {
t.Fatalf("Dim mismatch: %d", tree.Dim())
}
}

View file

@ -55,6 +55,8 @@ markdown_extensions:
nav: nav:
- Home: index.md - Home: index.md
- Getting Started: getting-started.md - Getting Started: getting-started.md
- Examples:
- Best Ping Peer (DHT): dht-best-ping.md
- API Reference: api.md - API Reference: api.md
- License: license.md - License: license.md

View file

@ -3,7 +3,7 @@ package poindexter
// Version returns the current version of the library. // Version returns the current version of the library.
func Version() string { func Version() string {
return "0.1.0" return "0.2.0"
} }
// Hello returns a greeting message. // Hello returns a greeting message.