// Truncate a &str to a byte budget at a char boundary (prefix) #[inline] pub fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str { if s.len() <= maxb { return s; } let mut last_ok = 0; for (i, ch) in s.char_indices() { let nb = i + ch.len_utf8(); if nb > maxb { break; } last_ok = nb; } &s[..last_ok] } // Take a suffix of a &str within a byte budget at a char boundary #[inline] pub fn take_last_bytes_at_char_boundary(s: &str, maxb: usize) -> &str { if s.len() <= maxb { return s; } let mut start = s.len(); let mut used = 0usize; for (i, ch) in s.char_indices().rev() { let nb = ch.len_utf8(); if used + nb > maxb { break; } start = i; used += nb; if start == 0 { break; } } &s[start..] } /// Sanitize a tag value to comply with metric tag validation rules: /// only ASCII alphanumeric, '.', '_', '-', and '/' are allowed. pub fn sanitize_metric_tag_value(value: &str) -> String { const MAX_LEN: usize = 256; let sanitized: String = value .chars() .map(|ch| { if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '/') { ch } else { '_' } }) .collect(); let trimmed = sanitized.trim_matches('_'); if trimmed.is_empty() || trimmed.chars().all(|ch| !ch.is_ascii_alphanumeric()) { return "unspecified".to_string(); } if trimmed.len() <= MAX_LEN { trimmed.to_string() } else { trimmed[..MAX_LEN].to_string() } } /// Find all UUIDs in a string. #[allow(clippy::unwrap_used)] pub fn find_uuids(s: &str) -> Vec { static RE: std::sync::OnceLock = std::sync::OnceLock::new(); let re = RE.get_or_init(|| { regex_lite::Regex::new( r"[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}", ) .unwrap() // Unwrap is safe thanks to the tests. }); re.find_iter(s).map(|m| m.as_str().to_string()).collect() } /// Convert a markdown-style `#L..` location suffix into a terminal-friendly /// `:line[:column][-line[:column]]` suffix. pub fn normalize_markdown_hash_location_suffix(suffix: &str) -> Option { let fragment = suffix.strip_prefix('#')?; let (start, end) = match fragment.split_once('-') { Some((start, end)) => (start, Some(end)), None => (fragment, None), }; let (start_line, start_column) = parse_markdown_hash_location_point(start)?; let mut normalized = String::from(":"); normalized.push_str(start_line); if let Some(column) = start_column { normalized.push(':'); normalized.push_str(column); } if let Some(end) = end { let (end_line, end_column) = parse_markdown_hash_location_point(end)?; normalized.push('-'); normalized.push_str(end_line); if let Some(column) = end_column { normalized.push(':'); normalized.push_str(column); } } Some(normalized) } fn parse_markdown_hash_location_point(point: &str) -> Option<(&str, Option<&str>)> { let point = point.strip_prefix('L')?; match point.split_once('C') { Some((line, column)) => Some((line, Some(column))), None => Some((point, None)), } } #[cfg(test)] mod tests { use super::find_uuids; use super::normalize_markdown_hash_location_suffix; use super::sanitize_metric_tag_value; use pretty_assertions::assert_eq; #[test] fn find_uuids_finds_multiple() { let input = "x 00112233-4455-6677-8899-aabbccddeeff-k y 12345678-90ab-cdef-0123-456789abcdef"; assert_eq!( find_uuids(input), vec![ "00112233-4455-6677-8899-aabbccddeeff".to_string(), "12345678-90ab-cdef-0123-456789abcdef".to_string(), ] ); } #[test] fn find_uuids_ignores_invalid() { let input = "not-a-uuid-1234-5678-9abc-def0-123456789abc"; assert_eq!(find_uuids(input), Vec::::new()); } #[test] fn find_uuids_handles_non_ascii_without_overlap() { let input = "🙂 55e5d6f7-8a7f-4d2a-8d88-123456789012abc"; assert_eq!( find_uuids(input), vec!["55e5d6f7-8a7f-4d2a-8d88-123456789012".to_string()] ); } #[test] fn sanitize_metric_tag_value_trims_and_fills_unspecified() { let msg = "///"; assert_eq!(sanitize_metric_tag_value(msg), "unspecified"); } #[test] fn sanitize_metric_tag_value_replaces_invalid_chars() { let msg = "bad value!"; assert_eq!(sanitize_metric_tag_value(msg), "bad_value"); } #[test] fn normalize_markdown_hash_location_suffix_converts_single_location() { assert_eq!( normalize_markdown_hash_location_suffix("#L74C3"), Some(":74:3".to_string()) ); } #[test] fn normalize_markdown_hash_location_suffix_converts_ranges() { assert_eq!( normalize_markdown_hash_location_suffix("#L74C3-L76C9"), Some(":74:3-76:9".to_string()) ); } }