// Truncate a &str to a byte budget at a char boundary (prefix) #[inline] pub fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str { if s.len() <= maxb { return s; } let mut last_ok = 0; for (i, ch) in s.char_indices() { let nb = i + ch.len_utf8(); if nb > maxb { break; } last_ok = nb; } &s[..last_ok] } // Take a suffix of a &str within a byte budget at a char boundary #[inline] pub fn take_last_bytes_at_char_boundary(s: &str, maxb: usize) -> &str { if s.len() <= maxb { return s; } let mut start = s.len(); let mut used = 0usize; for (i, ch) in s.char_indices().rev() { let nb = ch.len_utf8(); if used + nb > maxb { break; } start = i; used += nb; if start == 0 { break; } } &s[start..] } /// Sanitize a tag value to comply with metric tag validation rules: /// only ASCII alphanumeric, '.', '_', '-', and '/' are allowed. pub fn sanitize_metric_tag_value(value: &str) -> String { const MAX_LEN: usize = 256; let sanitized: String = value .chars() .map(|ch| { if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '/') { ch } else { '_' } }) .collect(); let trimmed = sanitized.trim_matches('_'); if trimmed.is_empty() || trimmed.chars().all(|ch| !ch.is_ascii_alphanumeric()) { return "unspecified".to_string(); } if trimmed.len() <= MAX_LEN { trimmed.to_string() } else { trimmed[..MAX_LEN].to_string() } } #[cfg(test)] mod tests { use super::sanitize_metric_tag_value; use pretty_assertions::assert_eq; #[test] fn sanitize_metric_tag_value_trims_and_fills_unspecified() { let msg = "///"; assert_eq!(sanitize_metric_tag_value(msg), "unspecified"); } #[test] fn sanitize_metric_tag_value_replaces_invalid_chars() { let msg = "bad value!"; assert_eq!(sanitize_metric_tag_value(msg), "bad_value"); } }