fix: Address race conditions and network blocking issues

Critical fixes:
- Release mutex before HTTP calls in GetStats() to prevent blocking
- Fix m.cmd race between Stop() and Wait() goroutine by capturing locally
- Add context support to GetStats() for proper request cancellation

High priority fixes:
- Add existence check in collectMinerStats() before operating on miners
- Add mutex-protected httpClient getter/setter for thread-safe test mocking

Changes:
- Miner interface now requires context.Context for GetStats()
- Stats HTTP requests timeout after 5 seconds (was 30s client default)
- All callers updated to pass context (service uses request context)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
snider 2025-12-31 01:55:24 +00:00
parent 473c72814f
commit 1351dc7562
15 changed files with 139 additions and 61 deletions

View file

@ -119,7 +119,7 @@ var serveCmd = &cobra.Command{
if err != nil {
fmt.Fprintf(os.Stderr, "Error getting miner status: %v\n", err)
} else {
stats, err := miner.GetStats()
stats, err := miner.GetStats(context.Background())
if err != nil {
fmt.Fprintf(os.Stderr, "Error getting miner stats: %v\n", err)
} else {

View file

@ -1,6 +1,7 @@
package cmd
import (
"context"
"fmt"
"github.com/spf13/cobra"
@ -23,7 +24,7 @@ var statusCmd = &cobra.Command{
return fmt.Errorf("failed to get miner: %w", err)
}
stats, err := miner.GetStats()
stats, err := miner.GetStats(context.Background())
if err != nil {
return fmt.Errorf("failed to get miner stats: %w", err)
}

View file

@ -1,6 +1,7 @@
package mining
import (
"context"
"testing"
"time"
)
@ -46,7 +47,7 @@ func TestDualMiningCPUAndGPU(t *testing.T) {
time.Sleep(20 * time.Second)
// Get stats
stats, err := minerInstance.GetStats()
stats, err := minerInstance.GetStats(context.Background())
if err != nil {
t.Logf("Warning: couldn't get stats: %v", err)
} else {

View file

@ -1,6 +1,7 @@
package mining
import (
"context"
"fmt"
"log"
"net"
@ -443,9 +444,19 @@ func (m *Manager) collectMinerStats() {
now := time.Now()
for _, miner := range minersToCollect {
stats, err := miner.GetStats()
minerName := miner.GetName()
// Verify miner still exists before collecting stats
m.mu.RLock()
_, stillExists := m.miners[minerName]
m.mu.RUnlock()
if !stillExists {
continue // Miner was removed, skip it
}
stats, err := miner.GetStats(context.Background())
if err != nil {
log.Printf("Error getting stats for miner %s: %v\n", miner.GetName(), err)
log.Printf("Error getting stats for miner %s: %v\n", minerName, err)
continue
}
@ -460,7 +471,6 @@ func (m *Manager) collectMinerStats() {
// Persist to database if enabled
if m.dbEnabled {
minerName := miner.GetName()
minerType := minerTypes[minerName]
dbPoint := database.HashratePoint{
Timestamp: point.Timestamp,

View file

@ -132,9 +132,9 @@ func (b *BaseMiner) GetBinaryPath() string {
// It first tries SIGTERM to allow cleanup, then SIGKILL if needed.
func (b *BaseMiner) Stop() error {
b.mu.Lock()
defer b.mu.Unlock()
if !b.Running || b.cmd == nil {
b.mu.Unlock()
return errors.New("miner is not running")
}
@ -144,25 +144,27 @@ func (b *BaseMiner) Stop() error {
b.stdinPipe = nil
}
// Helper to clean up state
cleanup := func() {
b.Running = false
b.cmd = nil
}
// Capture cmd locally to avoid race with Wait() goroutine
cmd := b.cmd
process := cmd.Process
// Mark as not running immediately to prevent concurrent Stop() calls
b.Running = false
b.cmd = nil
b.mu.Unlock()
// Try graceful shutdown with SIGTERM first (Unix only)
if runtime.GOOS != "windows" {
if err := b.cmd.Process.Signal(syscall.SIGTERM); err == nil {
if err := process.Signal(syscall.SIGTERM); err == nil {
// Wait up to 3 seconds for graceful shutdown
done := make(chan struct{})
go func() {
b.cmd.Process.Wait()
process.Wait()
close(done)
}()
select {
case <-done:
cleanup()
return nil
case <-time.After(3 * time.Second):
// Process didn't exit gracefully, force kill below
@ -171,14 +173,12 @@ func (b *BaseMiner) Stop() error {
}
// Force kill and wait for process to exit
if err := b.cmd.Process.Kill(); err != nil {
cleanup()
if err := process.Kill(); err != nil {
return err
}
// Wait for process to fully terminate to avoid zombies
b.cmd.Process.Wait()
cleanup()
process.Wait()
return nil
}
@ -214,7 +214,7 @@ func (b *BaseMiner) InstallFromURL(url string) error {
defer os.Remove(tmpfile.Name())
defer tmpfile.Close()
resp, err := httpClient.Get(url)
resp, err := getHTTPClient().Get(url)
if err != nil {
return err
}

View file

@ -1,6 +1,7 @@
package mining
import (
"context"
"time"
)
@ -17,7 +18,7 @@ type Miner interface {
Uninstall() error
Start(config *Config) error
Stop() error
GetStats() (*PerformanceMetrics, error)
GetStats(ctx context.Context) (*PerformanceMetrics, error)
GetName() string
GetPath() string
GetBinaryPath() string

View file

@ -483,7 +483,7 @@ func (s *Service) handleGetMinerStats(c *gin.Context) {
c.JSON(http.StatusNotFound, gin.H{"error": "miner not found"})
return
}
stats, err := miner.GetStats()
stats, err := miner.GetStats(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return

View file

@ -1,6 +1,7 @@
package mining
import (
"context"
"runtime"
"testing"
"time"
@ -201,7 +202,7 @@ func TestMinerResourceIsolation(t *testing.T) {
time.Sleep(10 * time.Second)
// Get baseline hashrate for miner 1 alone
stats1Alone, err := miner1.GetStats()
stats1Alone, err := miner1.GetStats(context.Background())
if err != nil {
t.Logf("Warning: couldn't get stats for miner 1: %v", err)
}
@ -226,11 +227,11 @@ func TestMinerResourceIsolation(t *testing.T) {
time.Sleep(15 * time.Second)
// Check both miners are running and producing hashrate
stats1, err := miner1.GetStats()
stats1, err := miner1.GetStats(context.Background())
if err != nil {
t.Logf("Warning: couldn't get stats for miner 1: %v", err)
}
stats2, err := miner2.GetStats()
stats2, err := miner2.GetStats(context.Background())
if err != nil {
t.Logf("Warning: couldn't get stats for miner 2: %v", err)
}

View file

@ -85,7 +85,7 @@ func getTTMinerConfigPath() (string, error) {
// GetLatestVersion fetches the latest version of TT-Miner from the GitHub API.
func (m *TTMiner) GetLatestVersion() (string, error) {
resp, err := httpClient.Get("https://api.github.com/repos/TrailingStop/TT-Miner-release/releases/latest")
resp, err := getHTTPClient().Get("https://api.github.com/repos/TrailingStop/TT-Miner-release/releases/latest")
if err != nil {
return "", err
}

View file

@ -64,11 +64,16 @@ func (m *TTMiner) Start(config *Config) error {
m.Running = true
// Monitor the process in a goroutine
// Capture cmd locally to avoid race with Stop()
cmd := m.cmd
go func() {
err := m.cmd.Wait()
err := cmd.Wait()
m.mu.Lock()
m.Running = false
// Only clear if this is still the same command (not restarted)
if m.cmd == cmd {
m.Running = false
m.cmd = nil
}
m.mu.Unlock()
if err != nil {
log.Printf("TT-Miner exited with error: %v", err)

View file

@ -1,6 +1,7 @@
package mining
import (
"context"
"encoding/json"
"errors"
"fmt"
@ -8,19 +9,32 @@ import (
)
// GetStats retrieves performance metrics from the TT-Miner API.
func (m *TTMiner) GetStats() (*PerformanceMetrics, error) {
m.mu.Lock()
defer m.mu.Unlock()
func (m *TTMiner) GetStats(ctx context.Context) (*PerformanceMetrics, error) {
// Read state under RLock, then release before HTTP call
m.mu.RLock()
if !m.Running {
m.mu.RUnlock()
return nil, errors.New("miner is not running")
}
if m.API == nil || m.API.ListenPort == 0 {
m.mu.RUnlock()
return nil, errors.New("miner API not configured or port is zero")
}
host := m.API.ListenHost
port := m.API.ListenPort
m.mu.RUnlock()
// TT-Miner API endpoint - try the summary endpoint
resp, err := httpClient.Get(fmt.Sprintf("http://%s:%d/summary", m.API.ListenHost, m.API.ListenPort))
// Create request with context and timeout
reqCtx, cancel := context.WithTimeout(ctx, statsTimeout)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, "GET", fmt.Sprintf("http://%s:%d/summary", host, port), nil)
if err != nil {
return nil, err
}
// HTTP call outside the lock to avoid blocking other operations
resp, err := getHTTPClient().Do(req)
if err != nil {
return nil, err
}
@ -35,8 +49,10 @@ func (m *TTMiner) GetStats() (*PerformanceMetrics, error) {
return nil, err
}
// Store the full summary in the miner struct
// Store the full summary in the miner struct (requires lock)
m.mu.Lock()
m.FullStats = &summary
m.mu.Unlock()
// Calculate total hashrate from all GPUs
var totalHashrate float64

View file

@ -11,6 +11,7 @@ import (
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"github.com/adrg/xdg"
@ -22,8 +23,23 @@ type XMRigMiner struct {
FullStats *XMRigSummary `json:"full_stats,omitempty"`
}
var httpClient = &http.Client{
Timeout: 30 * time.Second,
var (
httpClient = &http.Client{Timeout: 30 * time.Second}
httpClientMu sync.RWMutex
)
// getHTTPClient returns the HTTP client with proper synchronization
func getHTTPClient() *http.Client {
httpClientMu.RLock()
defer httpClientMu.RUnlock()
return httpClient
}
// setHTTPClient sets the HTTP client (for testing)
func setHTTPClient(client *http.Client) {
httpClientMu.Lock()
defer httpClientMu.Unlock()
httpClient = client
}
// NewXMRigMiner creates a new XMRig miner instance with default settings.
@ -70,7 +86,7 @@ var getXMRigConfigPath = func(instanceName string) (string, error) {
// GetLatestVersion fetches the latest version of XMRig from the GitHub API.
func (m *XMRigMiner) GetLatestVersion() (string, error) {
resp, err := httpClient.Get("https://api.github.com/repos/xmrig/xmrig/releases/latest")
resp, err := getHTTPClient().Get("https://api.github.com/repos/xmrig/xmrig/releases/latest")
if err != nil {
return "", err
}

View file

@ -87,11 +87,16 @@ func (m *XMRigMiner) Start(config *Config) error {
m.Running = true
// Capture cmd locally to avoid race with Stop()
cmd := m.cmd
go func() {
m.cmd.Wait()
cmd.Wait()
m.mu.Lock()
m.Running = false
m.cmd = nil
// Only clear if this is still the same command (not restarted)
if m.cmd == cmd {
m.Running = false
m.cmd = nil
}
m.mu.Unlock()
}()

View file

@ -1,25 +1,44 @@
package mining
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"time"
)
// GetStats retrieves the performance statistics from the running XMRig miner.
func (m *XMRigMiner) GetStats() (*PerformanceMetrics, error) {
m.mu.Lock()
defer m.mu.Unlock()
// statsTimeout is the timeout for stats HTTP requests (shorter than general timeout)
const statsTimeout = 5 * time.Second
// GetStats retrieves the performance statistics from the running XMRig miner.
func (m *XMRigMiner) GetStats(ctx context.Context) (*PerformanceMetrics, error) {
// Read state under RLock, then release before HTTP call
m.mu.RLock()
if !m.Running {
m.mu.RUnlock()
return nil, errors.New("miner is not running")
}
if m.API == nil || m.API.ListenPort == 0 {
m.mu.RUnlock()
return nil, errors.New("miner API not configured or port is zero")
}
host := m.API.ListenHost
port := m.API.ListenPort
m.mu.RUnlock()
resp, err := httpClient.Get(fmt.Sprintf("http://%s:%d/2/summary", m.API.ListenHost, m.API.ListenPort))
// Create request with context and timeout
reqCtx, cancel := context.WithTimeout(ctx, statsTimeout)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, "GET", fmt.Sprintf("http://%s:%d/2/summary", host, port), nil)
if err != nil {
return nil, err
}
// HTTP call outside the lock to avoid blocking other operations
resp, err := getHTTPClient().Do(req)
if err != nil {
return nil, err
}
@ -34,8 +53,10 @@ func (m *XMRigMiner) GetStats() (*PerformanceMetrics, error) {
return nil, err
}
// Store the full summary in the miner struct
// Store the full summary in the miner struct (requires lock)
m.mu.Lock()
m.FullStats = &summary
m.mu.Unlock()
var hashrate int
if len(summary.Hashrate.Total) > 0 {

View file

@ -1,6 +1,7 @@
package mining
import (
"context"
"encoding/json"
"fmt"
"io"
@ -63,8 +64,8 @@ func TestXMRigMiner_GetName_Good(t *testing.T) {
}
func TestXMRigMiner_GetLatestVersion_Good(t *testing.T) {
originalClient := httpClient
httpClient = newTestClient(func(req *http.Request) *http.Response {
originalClient := getHTTPClient()
setHTTPClient(newTestClient(func(req *http.Request) *http.Response {
if req.URL.String() != "https://api.github.com/repos/xmrig/xmrig/releases/latest" {
return &http.Response{
StatusCode: http.StatusNotFound,
@ -77,8 +78,8 @@ func TestXMRigMiner_GetLatestVersion_Good(t *testing.T) {
Body: io.NopCloser(strings.NewReader(`{"tag_name": "v6.18.0"}`)),
Header: make(http.Header),
}
})
defer func() { httpClient = originalClient }()
}))
defer setHTTPClient(originalClient)
miner := NewXMRigMiner()
version, err := miner.GetLatestVersion()
@ -91,15 +92,15 @@ func TestXMRigMiner_GetLatestVersion_Good(t *testing.T) {
}
func TestXMRigMiner_GetLatestVersion_Bad(t *testing.T) {
originalClient := httpClient
httpClient = newTestClient(func(req *http.Request) *http.Response {
originalClient := getHTTPClient()
setHTTPClient(newTestClient(func(req *http.Request) *http.Response {
return &http.Response{
StatusCode: http.StatusNotFound,
Body: io.NopCloser(strings.NewReader("Not Found")),
Header: make(http.Header),
}
})
defer func() { httpClient = originalClient }()
}))
defer setHTTPClient(originalClient)
miner := NewXMRigMiner()
_, err := miner.GetLatestVersion()
@ -197,9 +198,9 @@ func TestXMRigMiner_GetStats_Good(t *testing.T) {
}))
defer server.Close()
originalHTTPClient := httpClient
httpClient = server.Client()
defer func() { httpClient = originalHTTPClient }()
originalHTTPClient := getHTTPClient()
setHTTPClient(server.Client())
defer setHTTPClient(originalHTTPClient)
miner := NewXMRigMiner()
miner.Running = true // Mock running state
@ -209,7 +210,7 @@ func TestXMRigMiner_GetStats_Good(t *testing.T) {
miner.API.ListenHost = parts[0]
fmt.Sscanf(parts[1], "%d", &miner.API.ListenPort)
stats, err := miner.GetStats()
stats, err := miner.GetStats(context.Background())
if err != nil {
t.Fatalf("GetStats() returned an error: %v", err)
}
@ -237,7 +238,7 @@ func TestXMRigMiner_GetStats_Bad(t *testing.T) {
miner.API.ListenHost = "127.0.0.1"
miner.API.ListenPort = 9999 // A port that is unlikely to be in use
_, err := miner.GetStats()
_, err := miner.GetStats(context.Background())
if err == nil {
t.Fatalf("GetStats() did not return an error")
}