From dad508de31d94fcfd490737fed62bddd0725a52b Mon Sep 17 00:00:00 2001 From: Snider Date: Mon, 3 Nov 2025 18:19:12 +0000 Subject: [PATCH] Add lint configuration, benchmarks, and documentation for KDTree --- .github/workflows/ci.yml | 33 +++-- .golangci.yml | 20 ++++ CHANGELOG.md | 27 +++++ CODE_OF_CONDUCT.md | 9 ++ CONTRIBUTING.md | 59 +++++++++ README.md | 17 +++ SECURITY.md | 20 ++++ bench_kdtree_test.go | 69 +++++++++++ docs/api.md | 37 ++++++ examples/dht_ping_1d/main.go | 41 +++++++ examples/kdtree_2d_ping_hop/main.go | 34 ++++++ examples/kdtree_3d_ping_hop_geo/main.go | 36 ++++++ examples/kdtree_4d_ping_hop_geo_score/main.go | 38 ++++++ examples_test.go | 87 +++++++++++++- fuzz_kdtree_test.go | 113 ++++++++++++++++++ kdtree.go | 12 +- 16 files changed, 639 insertions(+), 13 deletions(-) create mode 100644 .golangci.yml create mode 100644 CHANGELOG.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 SECURITY.md create mode 100644 bench_kdtree_test.go create mode 100644 examples/dht_ping_1d/main.go create mode 100644 examples/kdtree_2d_ping_hop/main.go create mode 100644 examples/kdtree_3d_ping_hop_geo/main.go create mode 100644 examples/kdtree_4d_ping_hop_geo_score/main.go create mode 100644 fuzz_kdtree_test.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39f369f..c737b86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,9 +4,11 @@ on: push: branches: [ main, master ] pull_request: + branches: [ main, master ] jobs: - build-test: + build: + name: Build & Test runs-on: ubuntu-latest strategy: matrix: @@ -15,12 +17,15 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Setup Go + - name: Setup Go ${{ matrix.go-version }} uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} - - name: Verify go.mod is tidy + - name: Go env + run: go env + + - name: Tidy check run: | go mod tidy git diff --exit-code -- go.mod go.sum @@ -28,15 +33,25 @@ jobs: - name: Build run: go build ./... - - name: Test (race) - run: go test -race ./... - - name: Vet run: go vet ./... - - name: Govulncheck + - name: Test (race) + run: go test -race ./... + + - name: Build examples + run: | + if [ -d examples ]; then + go build ./examples/... + fi + + - name: Vulncheck uses: golang/govulncheck-action@v1 with: go-version-input: ${{ matrix.go-version }} - # Run against packages in the module - args: ./... + + - name: Lint + uses: golangci/golangci-lint-action@v6 + with: + version: latest + args: --timeout=5m diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..abcbe10 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,20 @@ +run: + timeout: 5m +linters: + enable: + - govet + - gosimple + - staticcheck + - ineffassign + - gofmt + - revive +issues: + exclude-use-default: false + max-issues-per-linter: 0 + max-same-issues: 0 +linters-settings: + revive: + severity: warning + rules: + - name: exported + arguments: ["disable"] # keep comments pragmatic; we have pkg docs and key API docs diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..23b0de5 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,27 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on Keep a Changelog and this project adheres to Semantic Versioning. + +## [Unreleased] +### Added +- README badges (pkg.go.dev, CI, Go Report Card, govulncheck) and KDTree performance/concurrency notes. +- Examples directory with runnable programs: 1D ping, 2D ping+hop, 3D ping+hop+geo, 4D ping+hop+geo+score. +- CI workflow (Go 1.22/1.23): tidy check, build, vet, test -race, build examples, govulncheck, golangci-lint. +- Lint configuration (.golangci.yml) with a pragmatic ruleset. +- Contributor docs: CONTRIBUTING.md, CODE_OF_CONDUCT.md, SECURITY.md. +- pkg.go.dev example functions for KDTree usage and helpers. +- Fuzz tests and benchmarks for KDTree (Nearest/KNearest/Radius and metrics). + +### Changed +- Documented KDTree complexity and tie-ordering in code comments. +- Docs: API examples synced to Version 0.2.0; added references to helpers and examples. + +## [0.2.0] - 2025-10-?? +### Added +- KDTree public API with generic payloads and helper builders (Build2D/3D/4D). +- Docs pages for DHT examples and multi-dimensional KDTree usage. + +[Unreleased]: https://github.com/Snider/Poindexter/compare/v0.2.0...HEAD +[0.2.0]: https://github.com/Snider/Poindexter/releases/tag/v0.2.0 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..58dcd62 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,9 @@ +# Code of Conduct + +This project has adopted the Contributor Covenant Code of Conduct. + +- Version: https://www.contributor-covenant.org/version/2/1/code_of_conduct/ +- FAQ: https://www.contributor-covenant.org/faq +- Translations: https://www.contributor-covenant.org/translations + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the maintainers via GitHub issues or by email listed on the repository profile. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..1d2a4f8 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,59 @@ +# Contributing to Poindexter + +Thanks for your interest in contributing! This document describes how to build, test, lint, and propose changes. + +## Getting started + +- Go 1.22+ (1.23 preferred) +- `git clone https://github.com/Snider/Poindexter` +- `cd Poindexter` + +## Build and test + +- Tidy deps: `go mod tidy` +- Build: `go build ./...` +- Run tests: `go test ./...` +- Run race tests: `go test -race ./...` +- Run examples: `go run ./examples/...` + +## Lint and vet + +We use golangci-lint in CI. To run locally: + +``` +# Install once +curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin + +# Run +golangci-lint run +``` + +Also run `go vet ./...` periodically. + +## Fuzzing and benchmarks + +- Fuzz (manually): `go test -run=NONE -fuzz=Fuzz -fuzztime=10s` +- Benchmarks: `go test -bench=. -benchmem` + +## Pull requests + +- Create a branch from `main`. +- Ensure `go mod tidy` produces no changes. +- Ensure `go test -race ./...` passes. +- Ensure `golangci-lint run` has no issues. +- Update CHANGELOG.md (Unreleased section) with a brief summary. + +## Coding style + +- Follow standard Go formatting and idioms. +- Public APIs must have doc comments starting with the identifier name and should be concise. +- Avoid breaking changes in minor versions; use SemVer. + +## Release process + +Maintainers: +- Update CHANGELOG.md. +- Tag releases `vX.Y.Z`. +- Consider updating docs and README badges if needed. + +Thanks for helping improve Poindexter! \ No newline at end of file diff --git a/README.md b/README.md index 4ab8321..e82c489 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ # Poindexter +[![Go Reference](https://pkg.go.dev/badge/github.com/Snider/Poindexter.svg)](https://pkg.go.dev/github.com/Snider/Poindexter) +[![CI](https://github.com/Snider/Poindexter/actions/workflows/ci.yml/badge.svg)](https://github.com/Snider/Poindexter/actions) +[![Go Report Card](https://goreportcard.com/badge/github.com/Snider/Poindexter)](https://goreportcard.com/report/github.com/Snider/Poindexter) +[![Vulncheck](https://img.shields.io/badge/govulncheck-enabled-brightgreen.svg)](https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck) + A Go library package providing utility functions including sorting algorithms with custom comparators. ## Features @@ -59,6 +64,18 @@ func main() { Full documentation is available at [https://snider.github.io/Poindexter/](https://snider.github.io/Poindexter/) +Explore runnable examples in the repository: +- examples/dht_ping_1d +- examples/kdtree_2d_ping_hop +- examples/kdtree_3d_ping_hop_geo +- examples/kdtree_4d_ping_hop_geo_score + +### KDTree performance and notes +- Current KDTree queries are O(n) linear scans, which are great for small-to-medium datasets or low-latency prototyping. For 1e5+ points and low/medium dimensions, consider swapping the internal engine to `gonum.org/v1/gonum/spatial/kdtree` (the API here is compatible by design). +- Insert is O(1) amortized; delete by ID is O(1) via swap-delete; order is not preserved. +- Concurrency: the KDTree type is not safe for concurrent mutation. Protect with a mutex or share immutable snapshots for read-mostly workloads. +- See multi-dimensional examples (ping/hops/geo/score) in docs and `examples/`. + ## License This project is licensed under the European Union Public Licence v1.2 (EUPL-1.2). See [LICENSE](LICENSE) for details. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..39a707c --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,20 @@ +# Security Policy + +## Supported Versions + +We support the latest minor release series. Please use the most recent tagged version. + +## Reporting a Vulnerability + +If you believe you have found a security vulnerability in Poindexter: + +- Please DO NOT open a public GitHub issue. +- Email the maintainer listed on the repository profile with: + - A description of the issue and its impact + - Steps to reproduce (a minimal proof-of-concept if possible) + - Affected versions/commit hashes +- We will acknowledge receipt within 5 business days and work with you on a fix and coordinated disclosure. + +## Dependencies + +We run `govulncheck` in CI. If you see alerts or advisories that affect Poindexter, please include links or CVE identifiers in your report. diff --git a/bench_kdtree_test.go b/bench_kdtree_test.go new file mode 100644 index 0000000..435bf5c --- /dev/null +++ b/bench_kdtree_test.go @@ -0,0 +1,69 @@ +package poindexter + +import ( + "fmt" + "math/rand" + "testing" +) + +func makePoints(n, dim int) []KDPoint[int] { + pts := make([]KDPoint[int], n) + for i := 0; i < n; i++ { + coords := make([]float64, dim) + for d := 0; d < dim; d++ { + coords[d] = rand.Float64() + } + pts[i] = KDPoint[int]{ID: fmt.Sprint(i), Coords: coords, Value: i} + } + return pts +} + +func benchNearest(b *testing.B, n, dim int) { + pts := makePoints(n, dim) + tr, _ := NewKDTree(pts) + q := make([]float64, dim) + for i := range q { + q[i] = 0.5 + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _, _ = tr.Nearest(q) + } +} + +func benchKNearest(b *testing.B, n, dim, k int) { + pts := makePoints(n, dim) + tr, _ := NewKDTree(pts) + q := make([]float64, dim) + for i := range q { + q[i] = 0.5 + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = tr.KNearest(q, k) + } +} + +func benchRadius(b *testing.B, n, dim int, r float64) { + pts := makePoints(n, dim) + tr, _ := NewKDTree(pts) + q := make([]float64, dim) + for i := range q { + q[i] = 0.5 + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = tr.Radius(q, r) + } +} + +func BenchmarkNearest_1k_2D(b *testing.B) { benchNearest(b, 1_000, 2) } +func BenchmarkNearest_10k_2D(b *testing.B) { benchNearest(b, 10_000, 2) } +func BenchmarkNearest_1k_4D(b *testing.B) { benchNearest(b, 1_000, 4) } +func BenchmarkNearest_10k_4D(b *testing.B) { benchNearest(b, 10_000, 4) } + +func BenchmarkKNearest10_1k_2D(b *testing.B) { benchKNearest(b, 1_000, 2, 10) } +func BenchmarkKNearest10_10k_2D(b *testing.B) { benchKNearest(b, 10_000, 2, 10) } + +func BenchmarkRadiusMid_1k_2D(b *testing.B) { benchRadius(b, 1_000, 2, 0.5) } +func BenchmarkRadiusMid_10k_2D(b *testing.B) { benchRadius(b, 10_000, 2, 0.5) } diff --git a/docs/api.md b/docs/api.md index 658d60f..1de135e 100644 --- a/docs/api.md +++ b/docs/api.md @@ -373,3 +373,40 @@ best, dist, _ := kdt.Nearest([]float64{0, 0, 0, 0}) Notes: - Keep and reuse your normalization parameters (min/max) if you need consistency across updates; otherwise rebuild points when the candidate set changes. - Use `invert` to turn “higher is better” features (like scores) into lower costs for distance calculations. + + +--- + +## KDTree Constructors and Errors + +### NewKDTree + +```go +func NewKDTree[T any](pts []KDPoint[T], opts ...KDOption) (*KDTree[T], error) +``` + +Build a KDTree from the provided points. All points must have the same dimensionality (> 0) and IDs (if provided) must be unique. + +Possible errors: +- `ErrEmptyPoints`: no points provided +- `ErrZeroDim`: dimension must be at least 1 +- `ErrDimMismatch`: inconsistent dimensionality among points +- `ErrDuplicateID`: duplicate point ID encountered + +### NewKDTreeFromDim + +```go +func NewKDTreeFromDim[T any](dim int, opts ...KDOption) (*KDTree[T], error) +``` + +Construct an empty KDTree with the given dimension, then populate later via `Insert`. + +--- + +## KDTree Notes: Complexity, Ties, Concurrency + +- Complexity: current implementation uses O(n) linear scans for queries (`Nearest`, `KNearest`, `Radius`). Inserts are O(1) amortized. Deletes by ID are O(1) using swap-delete (order not preserved). +- Tie ordering: when multiple neighbors have the same distance, ordering of ties is arbitrary and not stable between calls. +- Concurrency: KDTree is not safe for concurrent mutation. Wrap with a mutex or share immutable snapshots for read-mostly workloads. + +See runnable examples in the repository `examples/` and the docs pages for 1D DHT and multi-dimensional KDTree usage. diff --git a/examples/dht_ping_1d/main.go b/examples/dht_ping_1d/main.go new file mode 100644 index 0000000..a45acb6 --- /dev/null +++ b/examples/dht_ping_1d/main.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + poindexter "github.com/Snider/Poindexter" +) + +type Peer struct { + Addr string + Ping int +} + +func main() { + // Toy DHT routing table + table := []Peer{ + {Addr: "peer1.example:4001", Ping: 74}, + {Addr: "peer2.example:4001", Ping: 52}, + {Addr: "peer3.example:4001", Ping: 110}, + {Addr: "peer4.example:4001", Ping: 35}, + {Addr: "peer5.example:4001", Ping: 60}, + {Addr: "peer6.example:4001", Ping: 44}, + } + pts := make([]poindexter.KDPoint[Peer], 0, len(table)) + for i, p := range table { + pts = append(pts, poindexter.KDPoint[Peer]{ + ID: fmt.Sprintf("peer-%d", i+1), + Coords: []float64{float64(p.Ping)}, + Value: p, + }) + } + kdt, err := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + if err != nil { + panic(err) + } + best, d, ok := kdt.Nearest([]float64{0}) + if !ok { + fmt.Println("no peers found") + return + } + fmt.Printf("Best peer: %s (ping=%d ms), distance=%.0f\n", best.Value.Addr, best.Value.Ping, d) +} diff --git a/examples/kdtree_2d_ping_hop/main.go b/examples/kdtree_2d_ping_hop/main.go new file mode 100644 index 0000000..200318e --- /dev/null +++ b/examples/kdtree_2d_ping_hop/main.go @@ -0,0 +1,34 @@ +package main + +import ( + "fmt" + poindexter "github.com/Snider/Poindexter" +) + +type Peer2 struct { + ID string + PingMS float64 + Hops float64 +} + +func main() { + peers := []Peer2{ + {ID: "A", PingMS: 22, Hops: 3}, + {ID: "B", PingMS: 34, Hops: 2}, + {ID: "C", PingMS: 15, Hops: 4}, + {ID: "D", PingMS: 55, Hops: 1}, + {ID: "E", PingMS: 18, Hops: 2}, + } + weights := [2]float64{1.0, 1.0} + invert := [2]bool{false, false} + pts, _ := poindexter.Build2D( + peers, + func(p Peer2) string { return p.ID }, + func(p Peer2) float64 { return p.PingMS }, + func(p Peer2) float64 { return p.Hops }, + weights, invert, + ) + tr, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.ManhattanDistance{})) + best, _, _ := tr.Nearest([]float64{0, 0.3}) + fmt.Println("2D best:", best.ID) +} diff --git a/examples/kdtree_3d_ping_hop_geo/main.go b/examples/kdtree_3d_ping_hop_geo/main.go new file mode 100644 index 0000000..f844b29 --- /dev/null +++ b/examples/kdtree_3d_ping_hop_geo/main.go @@ -0,0 +1,36 @@ +package main + +import ( + "fmt" + poindexter "github.com/Snider/Poindexter" +) + +type Peer3 struct { + ID string + PingMS float64 + Hops float64 + GeoKM float64 +} + +func main() { + peers := []Peer3{ + {ID: "A", PingMS: 22, Hops: 3, GeoKM: 1200}, + {ID: "B", PingMS: 34, Hops: 2, GeoKM: 800}, + {ID: "C", PingMS: 15, Hops: 4, GeoKM: 4500}, + {ID: "D", PingMS: 55, Hops: 1, GeoKM: 300}, + {ID: "E", PingMS: 18, Hops: 2, GeoKM: 2200}, + } + weights := [3]float64{1.0, 0.7, 0.3} + invert := [3]bool{false, false, false} + pts, _ := poindexter.Build3D( + peers, + func(p Peer3) string { return p.ID }, + func(p Peer3) float64 { return p.PingMS }, + func(p Peer3) float64 { return p.Hops }, + func(p Peer3) float64 { return p.GeoKM }, + weights, invert, + ) + tr, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + best, _, _ := tr.Nearest([]float64{0, weights[1] * 0.2, weights[2] * 0.4}) + fmt.Println("3D best:", best.ID) +} diff --git a/examples/kdtree_4d_ping_hop_geo_score/main.go b/examples/kdtree_4d_ping_hop_geo_score/main.go new file mode 100644 index 0000000..89180ca --- /dev/null +++ b/examples/kdtree_4d_ping_hop_geo_score/main.go @@ -0,0 +1,38 @@ +package main + +import ( + "fmt" + poindexter "github.com/Snider/Poindexter" +) + +type Peer4 struct { + ID string + PingMS float64 + Hops float64 + GeoKM float64 + Score float64 +} + +func main() { + peers := []Peer4{ + {ID: "A", PingMS: 22, Hops: 3, GeoKM: 1200, Score: 0.86}, + {ID: "B", PingMS: 34, Hops: 2, GeoKM: 800, Score: 0.91}, + {ID: "C", PingMS: 15, Hops: 4, GeoKM: 4500, Score: 0.70}, + {ID: "D", PingMS: 55, Hops: 1, GeoKM: 300, Score: 0.95}, + {ID: "E", PingMS: 18, Hops: 2, GeoKM: 2200, Score: 0.80}, + } + weights := [4]float64{1.0, 0.7, 0.2, 1.2} + invert := [4]bool{false, false, false, true} + pts, _ := poindexter.Build4D( + peers, + func(p Peer4) string { return p.ID }, + func(p Peer4) float64 { return p.PingMS }, + func(p Peer4) float64 { return p.Hops }, + func(p Peer4) float64 { return p.GeoKM }, + func(p Peer4) float64 { return p.Score }, + weights, invert, + ) + tr, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + best, _, _ := tr.Nearest([]float64{0, weights[1] * 0.2, weights[2] * 0.3, 0}) + fmt.Println("4D best:", best.ID) +} diff --git a/examples_test.go b/examples_test.go index 9468001..5ef41b7 100644 --- a/examples_test.go +++ b/examples_test.go @@ -30,7 +30,92 @@ func ExampleBuild2D() { ) tr, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.ManhattanDistance{})) _, _, _ = tr.Nearest([]float64{0, 0}) - // Querying the origin (0,0) in normalized space tends to favor minima on each axis. fmt.Printf("dim=%d len=%d", tr.Dim(), tr.Len()) // Output: dim=2 len=3 } + +func ExampleKDTree_Nearest() { + pts := []poindexter.KDPoint[int]{ + {ID: "x", Coords: []float64{0, 0}, Value: 1}, + {ID: "y", Coords: []float64{2, 0}, Value: 2}, + } + tr, _ := poindexter.NewKDTree(pts, poindexter.WithMetric(poindexter.EuclideanDistance{})) + p, d, ok := tr.Nearest([]float64{1, 0}) + fmt.Printf("ok=%v id=%s d=%.1f", ok, p.ID, d) + // Output: ok=true id=y d=1.0 +} + +func ExampleKDTree_KNearest() { + pts := []poindexter.KDPoint[int]{ + {ID: "a", Coords: []float64{0}, Value: 0}, + {ID: "b", Coords: []float64{1}, Value: 0}, + {ID: "c", Coords: []float64{2}, Value: 0}, + } + tr, _ := poindexter.NewKDTree(pts) + ns, ds := tr.KNearest([]float64{0.6}, 2) + fmt.Printf("%s %.1f | %s %.1f", ns[0].ID, ds[0], ns[1].ID, ds[1]) + // Output: b 0.4 | a 0.6 +} + +func ExampleKDTree_Radius() { + pts := []poindexter.KDPoint[int]{ + {ID: "a", Coords: []float64{0}, Value: 0}, + {ID: "b", Coords: []float64{1}, Value: 0}, + {ID: "c", Coords: []float64{2}, Value: 0}, + } + tr, _ := poindexter.NewKDTree(pts) + within, _ := tr.Radius([]float64{0}, 1.0) + fmt.Printf("%d %s %s", len(within), within[0].ID, within[1].ID) + // Output: 2 a b +} + +func ExampleKDTree_InsertDeleteByID() { + pts := []poindexter.KDPoint[string]{ + {ID: "A", Coords: []float64{0}, Value: "a"}, + } + tr, _ := poindexter.NewKDTree(pts) + tr.Insert(poindexter.KDPoint[string]{ID: "Z", Coords: []float64{0.1}, Value: "z"}) + p, _, _ := tr.Nearest([]float64{0.09}) + fmt.Println(p.ID) + tr.DeleteByID("Z") + p2, _, _ := tr.Nearest([]float64{0.09}) + fmt.Println(p2.ID) + // Output: + // Z + // A +} + +func ExampleBuild3D() { + type rec struct{ x, y, z float64 } + items := []rec{{0, 0, 0}, {1, 1, 1}} + weights := [3]float64{1, 1, 1} + invert := [3]bool{false, false, false} + pts, _ := poindexter.Build3D(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.x }, + func(r rec) float64 { return r.y }, + func(r rec) float64 { return r.z }, + weights, invert, + ) + tr, _ := poindexter.NewKDTree(pts) + fmt.Println(tr.Dim()) + // Output: 3 +} + +func ExampleBuild4D() { + type rec struct{ a, b, c, d float64 } + items := []rec{{0, 0, 0, 0}, {1, 1, 1, 1}} + weights := [4]float64{1, 1, 1, 1} + invert := [4]bool{false, false, false, false} + pts, _ := poindexter.Build4D(items, + func(r rec) string { return "" }, + func(r rec) float64 { return r.a }, + func(r rec) float64 { return r.b }, + func(r rec) float64 { return r.c }, + func(r rec) float64 { return r.d }, + weights, invert, + ) + tr, _ := poindexter.NewKDTree(pts) + fmt.Println(tr.Dim()) + // Output: 4 +} diff --git a/fuzz_kdtree_test.go b/fuzz_kdtree_test.go new file mode 100644 index 0000000..16d7e57 --- /dev/null +++ b/fuzz_kdtree_test.go @@ -0,0 +1,113 @@ +package poindexter + +import ( + "math/rand" + "testing" +) + +// FuzzKDTreeNearest_NoPanic ensures Nearest never panics and distances are non-negative. +func FuzzKDTreeNearest_NoPanic(f *testing.F) { + // Seed with small cases + f.Add(3, 2) + f.Add(5, 4) + f.Fuzz(func(t *testing.T, n int, dim int) { + if n <= 0 { + n = 1 + } + if n > 64 { + n = 64 + } + if dim <= 0 { + dim = 1 + } + if dim > 8 { + dim = 8 + } + + pts := make([]KDPoint[int], n) + for i := 0; i < n; i++ { + coords := make([]float64, dim) + for d := 0; d < dim; d++ { + coords[d] = rand.Float64()*100 - 50 + } + pts[i] = KDPoint[int]{ID: "", Coords: coords, Value: i} + } + tr, err := NewKDTree(pts) + if err != nil { + t.Skip() + } + q := make([]float64, dim) + for d := range q { + q[d] = rand.Float64()*100 - 50 + } + _, dist, _ := tr.Nearest(q) + if dist < 0 { + t.Fatalf("negative distance: %v", dist) + } + }) +} + +// FuzzMetrics_NoNegative checks Manhattan, Euclidean, Chebyshev don't return negatives for random inputs. +func FuzzMetrics_NoNegative(f *testing.F) { + f.Add(2) + f.Add(4) + f.Fuzz(func(t *testing.T, dim int) { + if dim <= 0 { + dim = 1 + } + if dim > 8 { + dim = 8 + } + a := make([]float64, dim) + b := make([]float64, dim) + for i := 0; i < dim; i++ { + a[i] = rand.Float64()*10 - 5 + b[i] = rand.Float64()*10 - 5 + } + m1 := EuclideanDistance{}.Distance(a, b) + m2 := ManhattanDistance{}.Distance(a, b) + m3 := ChebyshevDistance{}.Distance(a, b) + if m1 < 0 || m2 < 0 || m3 < 0 { + t.Fatalf("negative metric: %v %v %v", m1, m2, m3) + } + }) +} + +// FuzzDimensionMismatch_NoPanic ensures queries with wrong dims return ok=false and not panic. +func FuzzDimensionMismatch_NoPanic(f *testing.F) { + f.Add(3, 2, 1) + f.Fuzz(func(t *testing.T, n, dim, qdim int) { + if n <= 0 { + n = 1 + } + if n > 32 { + n = 32 + } + if dim <= 0 { + dim = 1 + } + if dim > 6 { + dim = 6 + } + if qdim < 0 { + qdim = 0 + } + if qdim > 6 { + qdim = 6 + } + pts := make([]KDPoint[int], n) + for i := 0; i < n; i++ { + coords := make([]float64, dim) + pts[i] = KDPoint[int]{Coords: coords} + } + tr, err := NewKDTree(pts) + if err != nil { + t.Skip() + } + q := make([]float64, qdim) + _, _, ok := tr.Nearest(q) + if qdim != dim && ok { + t.Fatalf("expected ok=false for dim mismatch; dim=%d qdim=%d", dim, qdim) + } + }) +} diff --git a/kdtree.go b/kdtree.go index 587047c..76180d2 100644 --- a/kdtree.go +++ b/kdtree.go @@ -86,8 +86,13 @@ type kdOptions struct { func WithMetric(m DistanceMetric) KDOption { return func(o *kdOptions) { o.metric = m } } // KDTree is a lightweight wrapper providing nearest-neighbor operations. -// Note: This implementation currently uses linear scans for queries -// and is designed to be easily swappable with gonum.org/v1/gonum/spatial/kdtree +// +// Complexity: queries are O(n) linear scans in the current implementation. +// Inserts are O(1) amortized; deletes by ID are O(1) using swap-delete (order not preserved). +// Concurrency: KDTree is not safe for concurrent mutation. Guard with a mutex or +// share immutable snapshots for read-mostly workloads. +// +// This type is designed to be easily swappable with gonum.org/v1/gonum/spatial/kdtree // in the future without breaking the public API. type KDTree[T any] struct { points []KDPoint[T] @@ -156,7 +161,7 @@ func (t *KDTree[T]) Dim() int { return t.dim } func (t *KDTree[T]) Len() int { return len(t.points) } // Nearest returns the closest point to the query, along with its distance. -// ok is false if the tree is empty. +// ok is false if the tree is empty or the query dimensionality does not match Dim(). func (t *KDTree[T]) Nearest(query []float64) (KDPoint[T], float64, bool) { if len(query) != t.dim || t.Len() == 0 { return KDPoint[T]{}, 0, false @@ -177,6 +182,7 @@ func (t *KDTree[T]) Nearest(query []float64) (KDPoint[T], float64, bool) { } // KNearest returns up to k nearest neighbors to the query in ascending distance order. +// If multiple points are at the same distance, tie ordering is arbitrary and not stable between calls. func (t *KDTree[T]) KNearest(query []float64, k int) ([]KDPoint[T], []float64) { if k <= 0 || len(query) != t.dim || t.Len() == 0 { return nil, nil