Implements inference.Backend via a Python subprocess communicating over JSON Lines (stdin/stdout). No CGO required — pure Go + os/exec. - bridge.py: embedded Python script wrapping mlx_lm.load() and mlx_lm.stream_generate() with load/generate/chat/info/cancel/quit commands. Flushes stdout after every JSON line for streaming. - backend.go: Go subprocess manager. Extracts bridge.py from go:embed to temp file, spawns python3, pipes JSON requests. mlxlmModel implements full TextModel interface with mutex- serialised Generate/Chat, context cancellation with drain, and 2-second graceful Close with kill fallback. Auto-registers as "mlx_lm" via init(). Build tag: !nomlxlm. - backend_test.go: 15 tests using mock_bridge.py (no mlx_lm needed): name, load, generate, cancel, chat, close, error propagation, invalid path, auto-register, concurrent serialisation, classify/ batch unsupported, info, metrics, max_tokens limiting. All tests pass with -race. go vet clean. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
118 lines
3.1 KiB
Python
118 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
mock_bridge.py — Mock bridge for testing the mlxlm Go backend.
|
|
|
|
Implements the same JSON Lines protocol as bridge.py but without mlx_lm.
|
|
Returns deterministic fake responses for testing.
|
|
|
|
SPDX-Licence-Identifier: EUPL-1.2
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import os
|
|
|
|
_loaded = False
|
|
_model_path = ""
|
|
|
|
|
|
def _write(obj):
|
|
sys.stdout.write(json.dumps(obj) + "\n")
|
|
sys.stdout.flush()
|
|
|
|
|
|
def _error(msg):
|
|
_write({"error": str(msg)})
|
|
|
|
|
|
def main():
|
|
global _loaded, _model_path
|
|
|
|
for line in sys.stdin:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
req = json.loads(line)
|
|
except json.JSONDecodeError as e:
|
|
_error(f"parse error: {e}")
|
|
continue
|
|
|
|
cmd = req.get("cmd", "")
|
|
|
|
if cmd == "quit":
|
|
break
|
|
|
|
elif cmd == "load":
|
|
path = req.get("path", "")
|
|
if not path:
|
|
_error("load: missing 'path'")
|
|
continue
|
|
# Simulate failure for paths containing "FAIL".
|
|
if "FAIL" in path:
|
|
_error(f"load: cannot open model at {path}")
|
|
continue
|
|
_loaded = True
|
|
_model_path = path
|
|
_write({
|
|
"ok": True,
|
|
"model_type": "mock_model",
|
|
"vocab_size": 32000,
|
|
})
|
|
|
|
elif cmd == "generate":
|
|
if not _loaded:
|
|
_error("generate: no model loaded")
|
|
continue
|
|
|
|
max_tokens = req.get("max_tokens", 5)
|
|
# Check for error trigger.
|
|
prompt = req.get("prompt", "")
|
|
if "ERROR" in prompt:
|
|
_error("generate: simulated model error")
|
|
continue
|
|
|
|
# Emit fixed tokens.
|
|
tokens = ["Hello", " ", "world", "!", "\n"]
|
|
count = min(max_tokens, len(tokens))
|
|
for i in range(count):
|
|
_write({"token": tokens[i], "token_id": 100 + i})
|
|
_write({"done": True, "tokens_generated": count})
|
|
|
|
elif cmd == "chat":
|
|
if not _loaded:
|
|
_error("chat: no model loaded")
|
|
continue
|
|
|
|
messages = req.get("messages", [])
|
|
max_tokens = req.get("max_tokens", 5)
|
|
|
|
# Emit tokens reflecting the last user message.
|
|
tokens = ["I", " ", "heard", " ", "you"]
|
|
count = min(max_tokens, len(tokens))
|
|
for i in range(count):
|
|
_write({"token": tokens[i], "token_id": 200 + i})
|
|
_write({"done": True, "tokens_generated": count})
|
|
|
|
elif cmd == "info":
|
|
if not _loaded:
|
|
_error("info: no model loaded")
|
|
continue
|
|
_write({
|
|
"model_type": "mock_model",
|
|
"vocab_size": 32000,
|
|
"layers": 24,
|
|
"hidden_size": 2048,
|
|
})
|
|
|
|
elif cmd == "cancel":
|
|
# No-op in mock — real bridge sets a flag.
|
|
pass
|
|
|
|
else:
|
|
_error(f"unknown command: {cmd}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|