core-agent-ide/codex-rs/network-proxy/src/policy.rs
viyatb-oai 77222492f9
feat: introducing a network sandbox proxy (#8442)
This add a new crate, `codex-network-proxy`, a local network proxy
service used by Codex to enforce fine-grained network policy (domain
allow/deny) and to surface blocked network events for interactive
approvals.

- New crate: `codex-rs/network-proxy/` (`codex-network-proxy` binary +
library)
- Core capabilities:
  - HTTP proxy support (including CONNECT tunneling)
  - SOCKS5 proxy support (in the later PR)
- policy evaluation (allowed/denied domain lists; denylist wins;
wildcard support)
  - small admin API for polling/reload/mode changes
- optional MITM support for HTTPS CONNECT to enforce “limited mode”
method restrictions (later PR)

Will follow up integration with codex in subsequent PRs.

## Testing

- `cd codex-rs && cargo build -p codex-network-proxy`
- `cd codex-rs && cargo run -p codex-network-proxy -- proxy`
2026-01-23 17:47:09 -08:00

435 lines
15 KiB
Rust

#[cfg(test)]
use crate::config::NetworkMode;
use anyhow::Context;
use anyhow::Result;
use anyhow::ensure;
use globset::GlobBuilder;
use globset::GlobSet;
use globset::GlobSetBuilder;
use std::collections::HashSet;
use std::net::IpAddr;
use std::net::Ipv4Addr;
use std::net::Ipv6Addr;
use url::Host as UrlHost;
/// A normalized host string for policy evaluation.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Host(String);
impl Host {
pub fn parse(input: &str) -> Result<Self> {
let normalized = normalize_host(input);
ensure!(!normalized.is_empty(), "host is empty");
Ok(Self(normalized))
}
pub fn as_str(&self) -> &str {
&self.0
}
}
/// Returns true if the host is a loopback hostname or IP literal.
pub fn is_loopback_host(host: &Host) -> bool {
let host = host.as_str();
let host = host.split_once('%').map(|(ip, _)| ip).unwrap_or(host);
if host == "localhost" {
return true;
}
if let Ok(ip) = host.parse::<IpAddr>() {
return ip.is_loopback();
}
false
}
pub fn is_non_public_ip(ip: IpAddr) -> bool {
match ip {
IpAddr::V4(ip) => is_non_public_ipv4(ip),
IpAddr::V6(ip) => is_non_public_ipv6(ip),
}
}
fn is_non_public_ipv4(ip: Ipv4Addr) -> bool {
// Use the standard library classification helpers where possible; they encode the intent more
// clearly than hand-rolled range checks. Some non-public ranges (e.g., CGNAT and TEST-NET
// blocks) are not covered by stable stdlib helpers yet, so we fall back to CIDR checks.
ip.is_loopback()
|| ip.is_private()
|| ip.is_link_local()
|| ip.is_unspecified()
|| ip.is_multicast()
|| ip.is_broadcast()
|| ipv4_in_cidr(ip, [0, 0, 0, 0], 8) // "this network" (RFC 1122)
|| ipv4_in_cidr(ip, [100, 64, 0, 0], 10) // CGNAT (RFC 6598)
|| ipv4_in_cidr(ip, [192, 0, 0, 0], 24) // IETF Protocol Assignments (RFC 6890)
|| ipv4_in_cidr(ip, [192, 0, 2, 0], 24) // TEST-NET-1 (RFC 5737)
|| ipv4_in_cidr(ip, [198, 18, 0, 0], 15) // Benchmarking (RFC 2544)
|| ipv4_in_cidr(ip, [198, 51, 100, 0], 24) // TEST-NET-2 (RFC 5737)
|| ipv4_in_cidr(ip, [203, 0, 113, 0], 24) // TEST-NET-3 (RFC 5737)
|| ipv4_in_cidr(ip, [240, 0, 0, 0], 4) // Reserved (RFC 6890)
}
fn ipv4_in_cidr(ip: Ipv4Addr, base: [u8; 4], prefix: u8) -> bool {
let ip = u32::from(ip);
let base = u32::from(Ipv4Addr::from(base));
let mask = if prefix == 0 {
0
} else {
u32::MAX << (32 - prefix)
};
(ip & mask) == (base & mask)
}
fn is_non_public_ipv6(ip: Ipv6Addr) -> bool {
if let Some(v4) = ip.to_ipv4() {
return is_non_public_ipv4(v4) || ip.is_loopback();
}
// Treat anything that isn't globally routable as "local" for SSRF prevention. In particular:
// - `::1` loopback
// - `fc00::/7` unique-local (RFC 4193)
// - `fe80::/10` link-local
// - `::` unspecified
// - multicast ranges
ip.is_loopback()
|| ip.is_unspecified()
|| ip.is_multicast()
|| ip.is_unique_local()
|| ip.is_unicast_link_local()
}
/// Normalize host fragments for policy matching (trim whitespace, strip ports/brackets, lowercase).
pub fn normalize_host(host: &str) -> String {
let host = host.trim();
if host.starts_with('[')
&& let Some(end) = host.find(']')
{
return normalize_dns_host(&host[1..end]);
}
// The proxy stack should typically hand us a host without a port, but be
// defensive and strip `:port` when there is exactly one `:`.
if host.bytes().filter(|b| *b == b':').count() == 1 {
let host = host.split(':').next().unwrap_or_default();
return normalize_dns_host(host);
}
// Avoid mangling unbracketed IPv6 literals, but strip trailing dots so fully qualified domain
// names are treated the same as their dotless variants.
normalize_dns_host(host)
}
fn normalize_dns_host(host: &str) -> String {
let host = host.to_ascii_lowercase();
host.trim_end_matches('.').to_string()
}
fn normalize_pattern(pattern: &str) -> String {
let pattern = pattern.trim();
if pattern == "*" {
return "*".to_string();
}
let (prefix, remainder) = if let Some(domain) = pattern.strip_prefix("**.") {
("**.", domain)
} else if let Some(domain) = pattern.strip_prefix("*.") {
("*.", domain)
} else {
("", pattern)
};
let remainder = normalize_host(remainder);
if prefix.is_empty() {
remainder
} else {
format!("{prefix}{remainder}")
}
}
pub(crate) fn compile_globset(patterns: &[String]) -> Result<GlobSet> {
let mut builder = GlobSetBuilder::new();
let mut seen = HashSet::new();
for pattern in patterns {
let pattern = normalize_pattern(pattern);
// Supported domain patterns:
// - "example.com": match the exact host
// - "*.example.com": match any subdomain (not the apex)
// - "**.example.com": match the apex and any subdomain
// - "*": match any host
for candidate in expand_domain_pattern(&pattern) {
if !seen.insert(candidate.clone()) {
continue;
}
let glob = GlobBuilder::new(&candidate)
.case_insensitive(true)
.build()
.with_context(|| format!("invalid glob pattern: {candidate}"))?;
builder.add(glob);
}
}
Ok(builder.build()?)
}
#[derive(Debug, Clone)]
pub(crate) enum DomainPattern {
Any,
ApexAndSubdomains(String),
SubdomainsOnly(String),
Exact(String),
}
impl DomainPattern {
/// Parse a policy pattern for constraint comparisons.
///
/// Validation of glob syntax happens when building the globset; here we only
/// decode the wildcard prefixes to keep constraint checks lightweight.
pub(crate) fn parse(input: &str) -> Self {
let input = input.trim();
if input.is_empty() {
return Self::Exact(String::new());
}
if input == "*" {
Self::Any
} else if let Some(domain) = input.strip_prefix("**.") {
Self::parse_domain(domain, Self::ApexAndSubdomains)
} else if let Some(domain) = input.strip_prefix("*.") {
Self::parse_domain(domain, Self::SubdomainsOnly)
} else {
Self::Exact(input.to_string())
}
}
/// Parse a policy pattern for constraint comparisons, validating domain parts with `url`.
pub(crate) fn parse_for_constraints(input: &str) -> Self {
let input = input.trim();
if input.is_empty() {
return Self::Exact(String::new());
}
if input == "*" {
return Self::Any;
}
if let Some(domain) = input.strip_prefix("**.") {
return Self::ApexAndSubdomains(parse_domain_for_constraints(domain));
}
if let Some(domain) = input.strip_prefix("*.") {
return Self::SubdomainsOnly(parse_domain_for_constraints(domain));
}
Self::Exact(parse_domain_for_constraints(input))
}
fn parse_domain(domain: &str, build: impl FnOnce(String) -> Self) -> Self {
let domain = domain.trim();
if domain.is_empty() {
return Self::Exact(String::new());
}
build(domain.to_string())
}
pub(crate) fn allows(&self, candidate: &DomainPattern) -> bool {
match self {
DomainPattern::Any => true,
DomainPattern::Exact(domain) => match candidate {
DomainPattern::Exact(candidate) => domain_eq(candidate, domain),
_ => false,
},
DomainPattern::SubdomainsOnly(domain) => match candidate {
DomainPattern::Any => false,
DomainPattern::Exact(candidate) => is_strict_subdomain(candidate, domain),
DomainPattern::SubdomainsOnly(candidate) => {
is_subdomain_or_equal(candidate, domain)
}
DomainPattern::ApexAndSubdomains(candidate) => {
is_strict_subdomain(candidate, domain)
}
},
DomainPattern::ApexAndSubdomains(domain) => match candidate {
DomainPattern::Any => false,
DomainPattern::Exact(candidate) => is_subdomain_or_equal(candidate, domain),
DomainPattern::SubdomainsOnly(candidate) => {
is_subdomain_or_equal(candidate, domain)
}
DomainPattern::ApexAndSubdomains(candidate) => {
is_subdomain_or_equal(candidate, domain)
}
},
}
}
}
fn parse_domain_for_constraints(domain: &str) -> String {
let domain = domain.trim().trim_end_matches('.');
if domain.is_empty() {
return String::new();
}
let host = if domain.starts_with('[') && domain.ends_with(']') {
&domain[1..domain.len().saturating_sub(1)]
} else {
domain
};
if host.contains('*') || host.contains('?') || host.contains('%') {
return domain.to_string();
}
match UrlHost::parse(host) {
Ok(host) => host.to_string(),
Err(_) => String::new(),
}
}
fn expand_domain_pattern(pattern: &str) -> Vec<String> {
match DomainPattern::parse(pattern) {
DomainPattern::Any => vec![pattern.to_string()],
DomainPattern::Exact(domain) => vec![domain],
DomainPattern::SubdomainsOnly(domain) => {
vec![format!("?*.{domain}")]
}
DomainPattern::ApexAndSubdomains(domain) => {
vec![domain.clone(), format!("?*.{domain}")]
}
}
}
fn normalize_domain(domain: &str) -> String {
domain.trim_end_matches('.').to_ascii_lowercase()
}
fn domain_eq(left: &str, right: &str) -> bool {
normalize_domain(left) == normalize_domain(right)
}
fn is_subdomain_or_equal(child: &str, parent: &str) -> bool {
let child = normalize_domain(child);
let parent = normalize_domain(parent);
if child == parent {
return true;
}
child.ends_with(&format!(".{parent}"))
}
fn is_strict_subdomain(child: &str, parent: &str) -> bool {
let child = normalize_domain(child);
let parent = normalize_domain(parent);
child != parent && child.ends_with(&format!(".{parent}"))
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn method_allowed_full_allows_everything() {
assert!(NetworkMode::Full.allows_method("GET"));
assert!(NetworkMode::Full.allows_method("POST"));
assert!(NetworkMode::Full.allows_method("CONNECT"));
}
#[test]
fn method_allowed_limited_allows_only_safe_methods() {
assert!(NetworkMode::Limited.allows_method("GET"));
assert!(NetworkMode::Limited.allows_method("HEAD"));
assert!(NetworkMode::Limited.allows_method("OPTIONS"));
assert!(!NetworkMode::Limited.allows_method("POST"));
assert!(!NetworkMode::Limited.allows_method("CONNECT"));
}
#[test]
fn compile_globset_normalizes_trailing_dots() {
let set = compile_globset(&["Example.COM.".to_string()]).unwrap();
assert_eq!(true, set.is_match("example.com"));
assert_eq!(false, set.is_match("api.example.com"));
}
#[test]
fn compile_globset_normalizes_wildcards() {
let set = compile_globset(&["*.Example.COM.".to_string()]).unwrap();
assert_eq!(true, set.is_match("api.example.com"));
assert_eq!(false, set.is_match("example.com"));
}
#[test]
fn compile_globset_normalizes_apex_and_subdomains() {
let set = compile_globset(&["**.Example.COM.".to_string()]).unwrap();
assert_eq!(true, set.is_match("example.com"));
assert_eq!(true, set.is_match("api.example.com"));
}
#[test]
fn compile_globset_normalizes_bracketed_ipv6_literals() {
let set = compile_globset(&["[::1]".to_string()]).unwrap();
assert_eq!(true, set.is_match("::1"));
}
#[test]
fn is_loopback_host_handles_localhost_variants() {
assert!(is_loopback_host(&Host::parse("localhost").unwrap()));
assert!(is_loopback_host(&Host::parse("localhost.").unwrap()));
assert!(is_loopback_host(&Host::parse("LOCALHOST").unwrap()));
assert!(!is_loopback_host(&Host::parse("notlocalhost").unwrap()));
}
#[test]
fn is_loopback_host_handles_ip_literals() {
assert!(is_loopback_host(&Host::parse("127.0.0.1").unwrap()));
assert!(is_loopback_host(&Host::parse("::1").unwrap()));
assert!(!is_loopback_host(&Host::parse("1.2.3.4").unwrap()));
}
#[test]
fn is_non_public_ip_rejects_private_and_loopback_ranges() {
assert!(is_non_public_ip("127.0.0.1".parse().unwrap()));
assert!(is_non_public_ip("10.0.0.1".parse().unwrap()));
assert!(is_non_public_ip("192.168.0.1".parse().unwrap()));
assert!(is_non_public_ip("100.64.0.1".parse().unwrap()));
assert!(is_non_public_ip("192.0.0.1".parse().unwrap()));
assert!(is_non_public_ip("192.0.2.1".parse().unwrap()));
assert!(is_non_public_ip("198.18.0.1".parse().unwrap()));
assert!(is_non_public_ip("198.51.100.1".parse().unwrap()));
assert!(is_non_public_ip("203.0.113.1".parse().unwrap()));
assert!(is_non_public_ip("240.0.0.1".parse().unwrap()));
assert!(is_non_public_ip("0.1.2.3".parse().unwrap()));
assert!(!is_non_public_ip("8.8.8.8".parse().unwrap()));
assert!(is_non_public_ip("::ffff:127.0.0.1".parse().unwrap()));
assert!(is_non_public_ip("::ffff:10.0.0.1".parse().unwrap()));
assert!(!is_non_public_ip("::ffff:8.8.8.8".parse().unwrap()));
assert!(is_non_public_ip("::1".parse().unwrap()));
assert!(is_non_public_ip("fe80::1".parse().unwrap()));
assert!(is_non_public_ip("fc00::1".parse().unwrap()));
}
#[test]
fn normalize_host_lowercases_and_trims() {
assert_eq!(normalize_host(" ExAmPlE.CoM "), "example.com");
}
#[test]
fn normalize_host_strips_port_for_host_port() {
assert_eq!(normalize_host("example.com:1234"), "example.com");
}
#[test]
fn normalize_host_preserves_unbracketed_ipv6() {
assert_eq!(normalize_host("2001:db8::1"), "2001:db8::1");
}
#[test]
fn normalize_host_strips_trailing_dot() {
assert_eq!(normalize_host("example.com."), "example.com");
assert_eq!(normalize_host("ExAmPlE.CoM."), "example.com");
}
#[test]
fn normalize_host_strips_trailing_dot_with_port() {
assert_eq!(normalize_host("example.com.:443"), "example.com");
}
#[test]
fn normalize_host_strips_brackets_for_ipv6() {
assert_eq!(normalize_host("[::1]"), "::1");
assert_eq!(normalize_host("[::1]:443"), "::1");
}
}