chore: refactor tool handling (#4510)
# Tool System Refactor
- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.
Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.
## Next steps:
- Decompose `handle_container_exec_with_params`
- Add parallel tool calls
2025-10-03 13:21:06 +01:00
|
|
|
// Truncate a &str to a byte budget at a char boundary (prefix)
|
|
|
|
|
#[inline]
|
|
|
|
|
pub fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
|
|
|
|
|
if s.len() <= maxb {
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
let mut last_ok = 0;
|
|
|
|
|
for (i, ch) in s.char_indices() {
|
|
|
|
|
let nb = i + ch.len_utf8();
|
|
|
|
|
if nb > maxb {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
last_ok = nb;
|
|
|
|
|
}
|
|
|
|
|
&s[..last_ok]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Take a suffix of a &str within a byte budget at a char boundary
|
|
|
|
|
#[inline]
|
|
|
|
|
pub fn take_last_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
|
|
|
|
|
if s.len() <= maxb {
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
let mut start = s.len();
|
|
|
|
|
let mut used = 0usize;
|
|
|
|
|
for (i, ch) in s.char_indices().rev() {
|
|
|
|
|
let nb = ch.len_utf8();
|
|
|
|
|
if used + nb > maxb {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
start = i;
|
|
|
|
|
used += nb;
|
|
|
|
|
if start == 0 {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
&s[start..]
|
|
|
|
|
}
|
2026-02-05 06:30:31 -08:00
|
|
|
|
|
|
|
|
/// Sanitize a tag value to comply with metric tag validation rules:
|
|
|
|
|
/// only ASCII alphanumeric, '.', '_', '-', and '/' are allowed.
|
|
|
|
|
pub fn sanitize_metric_tag_value(value: &str) -> String {
|
|
|
|
|
const MAX_LEN: usize = 256;
|
|
|
|
|
let sanitized: String = value
|
|
|
|
|
.chars()
|
|
|
|
|
.map(|ch| {
|
|
|
|
|
if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '/') {
|
|
|
|
|
ch
|
|
|
|
|
} else {
|
|
|
|
|
'_'
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
let trimmed = sanitized.trim_matches('_');
|
|
|
|
|
if trimmed.is_empty() || trimmed.chars().all(|ch| !ch.is_ascii_alphanumeric()) {
|
|
|
|
|
return "unspecified".to_string();
|
|
|
|
|
}
|
|
|
|
|
if trimmed.len() <= MAX_LEN {
|
|
|
|
|
trimmed.to_string()
|
|
|
|
|
} else {
|
|
|
|
|
trimmed[..MAX_LEN].to_string()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-23 14:14:36 +00:00
|
|
|
/// Find all UUIDs in a string.
|
|
|
|
|
#[allow(clippy::unwrap_used)]
|
|
|
|
|
pub fn find_uuids(s: &str) -> Vec<String> {
|
|
|
|
|
static RE: std::sync::OnceLock<regex_lite::Regex> = std::sync::OnceLock::new();
|
|
|
|
|
let re = RE.get_or_init(|| {
|
|
|
|
|
regex_lite::Regex::new(
|
|
|
|
|
r"[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}",
|
|
|
|
|
)
|
|
|
|
|
.unwrap() // Unwrap is safe thanks to the tests.
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
re.find_iter(s).map(|m| m.as_str().to_string()).collect()
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-26 10:29:54 +00:00
|
|
|
/// Convert a markdown-style `#L..` location suffix into a terminal-friendly
|
|
|
|
|
/// `:line[:column][-line[:column]]` suffix.
|
|
|
|
|
pub fn normalize_markdown_hash_location_suffix(suffix: &str) -> Option<String> {
|
|
|
|
|
let fragment = suffix.strip_prefix('#')?;
|
|
|
|
|
let (start, end) = match fragment.split_once('-') {
|
|
|
|
|
Some((start, end)) => (start, Some(end)),
|
|
|
|
|
None => (fragment, None),
|
|
|
|
|
};
|
|
|
|
|
let (start_line, start_column) = parse_markdown_hash_location_point(start)?;
|
|
|
|
|
let mut normalized = String::from(":");
|
|
|
|
|
normalized.push_str(start_line);
|
|
|
|
|
if let Some(column) = start_column {
|
|
|
|
|
normalized.push(':');
|
|
|
|
|
normalized.push_str(column);
|
|
|
|
|
}
|
|
|
|
|
if let Some(end) = end {
|
|
|
|
|
let (end_line, end_column) = parse_markdown_hash_location_point(end)?;
|
|
|
|
|
normalized.push('-');
|
|
|
|
|
normalized.push_str(end_line);
|
|
|
|
|
if let Some(column) = end_column {
|
|
|
|
|
normalized.push(':');
|
|
|
|
|
normalized.push_str(column);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Some(normalized)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_markdown_hash_location_point(point: &str) -> Option<(&str, Option<&str>)> {
|
|
|
|
|
let point = point.strip_prefix('L')?;
|
|
|
|
|
match point.split_once('C') {
|
|
|
|
|
Some((line, column)) => Some((line, Some(column))),
|
|
|
|
|
None => Some((point, None)),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-05 06:30:31 -08:00
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
2026-02-23 14:14:36 +00:00
|
|
|
use super::find_uuids;
|
2026-02-26 10:29:54 +00:00
|
|
|
use super::normalize_markdown_hash_location_suffix;
|
2026-02-05 06:30:31 -08:00
|
|
|
use super::sanitize_metric_tag_value;
|
|
|
|
|
use pretty_assertions::assert_eq;
|
|
|
|
|
|
2026-02-23 14:14:36 +00:00
|
|
|
#[test]
|
|
|
|
|
fn find_uuids_finds_multiple() {
|
|
|
|
|
let input =
|
|
|
|
|
"x 00112233-4455-6677-8899-aabbccddeeff-k y 12345678-90ab-cdef-0123-456789abcdef";
|
|
|
|
|
assert_eq!(
|
|
|
|
|
find_uuids(input),
|
|
|
|
|
vec![
|
|
|
|
|
"00112233-4455-6677-8899-aabbccddeeff".to_string(),
|
|
|
|
|
"12345678-90ab-cdef-0123-456789abcdef".to_string(),
|
|
|
|
|
]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn find_uuids_ignores_invalid() {
|
|
|
|
|
let input = "not-a-uuid-1234-5678-9abc-def0-123456789abc";
|
|
|
|
|
assert_eq!(find_uuids(input), Vec::<String>::new());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn find_uuids_handles_non_ascii_without_overlap() {
|
|
|
|
|
let input = "🙂 55e5d6f7-8a7f-4d2a-8d88-123456789012abc";
|
|
|
|
|
assert_eq!(
|
|
|
|
|
find_uuids(input),
|
|
|
|
|
vec!["55e5d6f7-8a7f-4d2a-8d88-123456789012".to_string()]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-05 06:30:31 -08:00
|
|
|
#[test]
|
|
|
|
|
fn sanitize_metric_tag_value_trims_and_fills_unspecified() {
|
|
|
|
|
let msg = "///";
|
|
|
|
|
assert_eq!(sanitize_metric_tag_value(msg), "unspecified");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn sanitize_metric_tag_value_replaces_invalid_chars() {
|
|
|
|
|
let msg = "bad value!";
|
|
|
|
|
assert_eq!(sanitize_metric_tag_value(msg), "bad_value");
|
|
|
|
|
}
|
2026-02-26 10:29:54 +00:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_markdown_hash_location_suffix_converts_single_location() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
normalize_markdown_hash_location_suffix("#L74C3"),
|
|
|
|
|
Some(":74:3".to_string())
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_markdown_hash_location_suffix_converts_ranges() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
normalize_markdown_hash_location_suffix("#L74C3-L76C9"),
|
|
|
|
|
Some(":74:3-76:9".to_string())
|
|
|
|
|
);
|
|
|
|
|
}
|
2026-02-05 06:30:31 -08:00
|
|
|
}
|