core-agent-ide/codex-rs/utils/string/src/lib.rs
pash-openai 6acede5a28
tui: restore visible line numbers for hidden file links (#12870)
we recently changed file linking so the model uses markdown links when
it wants something to be clickable.

This works well across the GUI surfaces because they can render markdown
cleanly and use the full absolute path in the anchor target.

A previous pass hid the absolute path in the TUI (and only showed the
label), but that also meant we could lose useful location info when the
model put the line number or range in the anchor target instead of the
label.

This follow-up keeps the TUI behavior simple while making local file
links feel closer to the old TUI file reference style.

key changes:
- Local markdown file links in the TUI keep the old file-ref feel: code
styling, no underline, no visible absolute path.
- If the hidden local anchor target includes a location suffix and the
label does not already include one, we append that suffix to the visible
label.
- This works for single lines, line/column references, and ranges.
- If the label already includes the location, we leave it alone.
- normal web links keep the old TUI markdown-link behavior

some examples:
- `[foo.rs](/abs/path/foo.rs)` renders as `foo.rs`
- `[foo.rs](/abs/path/foo.rs:45)` renders as `foo.rs:45`
- `[foo.rs](/abs/path/foo.rs:45:3-48:9)` renders as `foo.rs:45:3-48:9`
- `[foo.rs:45](/abs/path/foo.rs:45)` stays `foo.rs:45`
- `[docs](https://example.com/docs)` still renders like a normal web
link

how it looks:
<img width="732" height="813" alt="Screenshot 2026-02-26 at 9 27 55 AM"
src="https://github.com/user-attachments/assets/d51bf236-653a-4e83-96e4-9427f0804471"
/>
2026-02-26 10:29:54 +00:00

176 lines
5.2 KiB
Rust

// Truncate a &str to a byte budget at a char boundary (prefix)
#[inline]
pub fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
if s.len() <= maxb {
return s;
}
let mut last_ok = 0;
for (i, ch) in s.char_indices() {
let nb = i + ch.len_utf8();
if nb > maxb {
break;
}
last_ok = nb;
}
&s[..last_ok]
}
// Take a suffix of a &str within a byte budget at a char boundary
#[inline]
pub fn take_last_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
if s.len() <= maxb {
return s;
}
let mut start = s.len();
let mut used = 0usize;
for (i, ch) in s.char_indices().rev() {
let nb = ch.len_utf8();
if used + nb > maxb {
break;
}
start = i;
used += nb;
if start == 0 {
break;
}
}
&s[start..]
}
/// Sanitize a tag value to comply with metric tag validation rules:
/// only ASCII alphanumeric, '.', '_', '-', and '/' are allowed.
pub fn sanitize_metric_tag_value(value: &str) -> String {
const MAX_LEN: usize = 256;
let sanitized: String = value
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '/') {
ch
} else {
'_'
}
})
.collect();
let trimmed = sanitized.trim_matches('_');
if trimmed.is_empty() || trimmed.chars().all(|ch| !ch.is_ascii_alphanumeric()) {
return "unspecified".to_string();
}
if trimmed.len() <= MAX_LEN {
trimmed.to_string()
} else {
trimmed[..MAX_LEN].to_string()
}
}
/// Find all UUIDs in a string.
#[allow(clippy::unwrap_used)]
pub fn find_uuids(s: &str) -> Vec<String> {
static RE: std::sync::OnceLock<regex_lite::Regex> = std::sync::OnceLock::new();
let re = RE.get_or_init(|| {
regex_lite::Regex::new(
r"[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}",
)
.unwrap() // Unwrap is safe thanks to the tests.
});
re.find_iter(s).map(|m| m.as_str().to_string()).collect()
}
/// Convert a markdown-style `#L..` location suffix into a terminal-friendly
/// `:line[:column][-line[:column]]` suffix.
pub fn normalize_markdown_hash_location_suffix(suffix: &str) -> Option<String> {
let fragment = suffix.strip_prefix('#')?;
let (start, end) = match fragment.split_once('-') {
Some((start, end)) => (start, Some(end)),
None => (fragment, None),
};
let (start_line, start_column) = parse_markdown_hash_location_point(start)?;
let mut normalized = String::from(":");
normalized.push_str(start_line);
if let Some(column) = start_column {
normalized.push(':');
normalized.push_str(column);
}
if let Some(end) = end {
let (end_line, end_column) = parse_markdown_hash_location_point(end)?;
normalized.push('-');
normalized.push_str(end_line);
if let Some(column) = end_column {
normalized.push(':');
normalized.push_str(column);
}
}
Some(normalized)
}
fn parse_markdown_hash_location_point(point: &str) -> Option<(&str, Option<&str>)> {
let point = point.strip_prefix('L')?;
match point.split_once('C') {
Some((line, column)) => Some((line, Some(column))),
None => Some((point, None)),
}
}
#[cfg(test)]
mod tests {
use super::find_uuids;
use super::normalize_markdown_hash_location_suffix;
use super::sanitize_metric_tag_value;
use pretty_assertions::assert_eq;
#[test]
fn find_uuids_finds_multiple() {
let input =
"x 00112233-4455-6677-8899-aabbccddeeff-k y 12345678-90ab-cdef-0123-456789abcdef";
assert_eq!(
find_uuids(input),
vec![
"00112233-4455-6677-8899-aabbccddeeff".to_string(),
"12345678-90ab-cdef-0123-456789abcdef".to_string(),
]
);
}
#[test]
fn find_uuids_ignores_invalid() {
let input = "not-a-uuid-1234-5678-9abc-def0-123456789abc";
assert_eq!(find_uuids(input), Vec::<String>::new());
}
#[test]
fn find_uuids_handles_non_ascii_without_overlap() {
let input = "🙂 55e5d6f7-8a7f-4d2a-8d88-123456789012abc";
assert_eq!(
find_uuids(input),
vec!["55e5d6f7-8a7f-4d2a-8d88-123456789012".to_string()]
);
}
#[test]
fn sanitize_metric_tag_value_trims_and_fills_unspecified() {
let msg = "///";
assert_eq!(sanitize_metric_tag_value(msg), "unspecified");
}
#[test]
fn sanitize_metric_tag_value_replaces_invalid_chars() {
let msg = "bad value!";
assert_eq!(sanitize_metric_tag_value(msg), "bad_value");
}
#[test]
fn normalize_markdown_hash_location_suffix_converts_single_location() {
assert_eq!(
normalize_markdown_hash_location_suffix("#L74C3"),
Some(":74:3".to_string())
);
}
#[test]
fn normalize_markdown_hash_location_suffix_converts_ranges() {
assert_eq!(
normalize_markdown_hash_location_suffix("#L74C3-L76C9"),
Some(":74:3-76:9".to_string())
);
}
}