diff --git a/Cargo.lock b/Cargo.lock index 97ed05c78..d707bfaed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1147,6 +1147,7 @@ dependencies = [ "dataplane-concurrency", "dataplane-dpdk", "dataplane-dpdk-sysroot-helper", + "dataplane-flow-filter", "dataplane-gwname", "dataplane-id", "dataplane-mgmt", @@ -1297,6 +1298,21 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "dataplane-flow-filter" +version = "0.7.0" +dependencies = [ + "dataplane-config", + "dataplane-lpm", + "dataplane-net", + "dataplane-pipeline", + "dataplane-tracectl", + "left-right", + "linkme", + "tracing", + "tracing-test", +] + [[package]] name = "dataplane-flow-info" version = "0.7.0" @@ -1467,6 +1483,7 @@ dependencies = [ "dataplane-args", "dataplane-concurrency", "dataplane-config", + "dataplane-flow-filter", "dataplane-gwname", "dataplane-id", "dataplane-interface-manager", @@ -1476,7 +1493,6 @@ dependencies = [ "dataplane-nat", "dataplane-net", "dataplane-pipeline", - "dataplane-pkt-meta", "dataplane-rekon", "dataplane-routing", "dataplane-stats", @@ -1510,6 +1526,7 @@ dependencies = [ "bolero", "dataplane-concurrency", "dataplane-config", + "dataplane-flow-filter", "dataplane-flow-info", "dataplane-lpm", "dataplane-net", diff --git a/Cargo.toml b/Cargo.toml index 0049318df..ecd5d3a6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "dpdk-sys", "dpdk-sysroot-helper", "errno", + "flow-filter", "flow-info", "gwname", "hardware", @@ -56,6 +57,7 @@ dpdk-sys = { path = "./dpdk-sys", package = "dataplane-dpdk-sys", features = [] dpdk-sysroot-helper = { path = "./dpdk-sysroot-helper", package = "dataplane-dpdk-sysroot-helper", features = [] } dplane-rpc = { git = "https://github.com/githedgehog/dplane-rpc.git", rev = "e8fc33db10e1d00785f2a2b90cbadcad7900f200", features = [] } errno = { path = "./errno", package = "dataplane-errno", features = [] } +flow-filter = { path = "./flow-filter", package = "dataplane-flow-filter", features = [] } flow-info = { path = "./flow-info", package = "dataplane-flow-info", features = [] } gwname = { path = "./gwname", package = "dataplane-gwname", features = [] } hardware = { path = "./hardware", package = "dataplane-hardware", features = [] } diff --git a/config/src/external/overlay/vpcpeering.rs b/config/src/external/overlay/vpcpeering.rs index 31dfc3276..4a704d773 100644 --- a/config/src/external/overlay/vpcpeering.rs +++ b/config/src/external/overlay/vpcpeering.rs @@ -166,6 +166,20 @@ impl VpcExpose { self.ips.insert(prefix); self } + // If the as_range list is empty, then there's no NAT required for the expose, meaning that the + // public IPs are those from the "ips" list. This method extends the list of public prefixes, + // whether it's "ips" or "as_range". + #[must_use] + pub fn insert_public_ip(mut self, prefix: PrefixWithOptionalPorts) -> Self { + if let Some(nat) = self.nat.as_mut() + && !nat.as_range.is_empty() + { + nat.as_range.insert(prefix); + } else { + self.ips.insert(prefix); + } + self + } #[must_use] pub fn not(mut self, prefix: PrefixWithOptionalPorts) -> Self { self.nots.insert(prefix); @@ -207,6 +221,20 @@ impl VpcExpose { &nat.as_range } } + // If the as_range list is empty, then there's no NAT required for the expose, meaning that the + // public IPs are those from the "ips" list. This method returns a mutable reference to the current list of public IPs + // for the VpcExpose. + #[must_use] + pub fn public_ips_mut(&mut self) -> &mut BTreeSet { + let Some(nat) = self.nat.as_mut() else { + return &mut self.ips; + }; + if nat.as_range.is_empty() { + &mut self.ips + } else { + &mut nat.as_range + } + } // Same as public_ips, but returns the list of excluded prefixes #[must_use] pub fn public_excludes(&self) -> &BTreeSet { diff --git a/dataplane/Cargo.toml b/dataplane/Cargo.toml index fe8d430f2..6245182e9 100644 --- a/dataplane/Cargo.toml +++ b/dataplane/Cargo.toml @@ -15,6 +15,7 @@ concurrency = { workspace = true } ctrlc = { workspace = true, features = ["termination"] } dpdk = { workspace = true } dyn-iter = { workspace = true } +flow-filter = { workspace = true } futures = { workspace = true } gwname = { workspace = true } hyper = { workspace = true } diff --git a/dataplane/src/main.rs b/dataplane/src/main.rs index 7d4a8bdad..9aaa8a775 100644 --- a/dataplane/src/main.rs +++ b/dataplane/src/main.rs @@ -154,7 +154,7 @@ fn main() { vpcmapw: setup.vpcmapw, nattablesw: setup.nattablesw, natallocatorw: setup.natallocatorw, - vpcdtablesw: setup.vpcdtablesw, + flowfilterw: setup.flowfiltertablesw, vpc_stats_store: setup.vpc_stats_store, }, }) diff --git a/dataplane/src/packet_processor/mod.rs b/dataplane/src/packet_processor/mod.rs index 1ea6d2bb6..ebfa4aae6 100644 --- a/dataplane/src/packet_processor/mod.rs +++ b/dataplane/src/packet_processor/mod.rs @@ -12,7 +12,7 @@ use super::packet_processor::ipforward::IpForwarder; use concurrency::sync::Arc; -use pkt_meta::dst_vpcd_lookup::{DstVpcdLookup, VpcDiscTablesWriter}; +use flow_filter::{FlowFilter, FlowFilterTableWriter}; use pkt_meta::flow_table::{ExpirationsNF, FlowTable, LookupNF}; use nat::stateful::NatAllocatorWriter; @@ -38,7 +38,7 @@ where pub vpcmapw: VpcMapWriter, pub nattablesw: NatTablesWriter, pub natallocatorw: NatAllocatorWriter, - pub vpcdtablesw: VpcDiscTablesWriter, + pub flowfiltertablesw: FlowFilterTableWriter, pub stats: StatsCollector, pub vpc_stats_store: Arc, } @@ -49,7 +49,7 @@ pub(crate) fn start_router( ) -> Result, RouterError> { let nattablesw = NatTablesWriter::new(); let natallocatorw = NatAllocatorWriter::new(); - let vpcdtablesw = VpcDiscTablesWriter::new(); + let flowfiltertablesw = FlowFilterTableWriter::new(); let router = Router::new(params)?; let vpcmapw = VpcMapWriter::::new(); @@ -65,7 +65,7 @@ pub(crate) fn start_router( let iftr_factory = router.get_iftabler_factory(); let fibtr_factory = router.get_fibtr_factory(); - let vpcdtablesr_factory = vpcdtablesw.get_reader_factory(); + let flowfiltertablesr_factory = flowfiltertablesw.get_reader_factory(); let atabler_factory = router.get_atabler_factory(); let nattabler_factory = nattablesw.get_reader_factory(); let natallocator_factory = natallocatorw.get_reader_factory(); @@ -74,7 +74,6 @@ pub(crate) fn start_router( // Build network functions let stage_ingress = Ingress::new("Ingress", iftr_factory.handle()); let stage_egress = Egress::new("Egress", iftr_factory.handle(), atabler_factory.handle()); - let dst_vpcd_lookup = DstVpcdLookup::new("dst-vni-lookup", vpcdtablesr_factory.handle()); let iprouter1 = IpForwarder::new("IP-Forward-1", fibtr_factory.handle()); let iprouter2 = IpForwarder::new("IP-Forward-2", fibtr_factory.handle()); let stateless_nat = StatelessNat::with_reader("stateless-NAT", nattabler_factory.handle()); @@ -87,14 +86,15 @@ pub(crate) fn start_router( let stats_stage = Stats::new("stats", writer.clone()); let flow_lookup_nf = LookupNF::new("flow-lookup", flow_table.clone()); let flow_expirations_nf = ExpirationsNF::new(flow_table.clone()); + let flow_filter = FlowFilter::new("flow-filter", flowfiltertablesr_factory.handle()); // Build the pipeline for a router. The composition of the pipeline (in stages) is currently // hard-coded. In any pipeline, the Stats and ExpirationsNF stages should go last DynPipeline::new() .add_stage(stage_ingress) .add_stage(iprouter1) - .add_stage(dst_vpcd_lookup) .add_stage(flow_lookup_nf) + .add_stage(flow_filter) .add_stage(stateless_nat) .add_stage(stateful_nat) .add_stage(iprouter2) @@ -110,7 +110,7 @@ pub(crate) fn start_router( vpcmapw, nattablesw, natallocatorw, - vpcdtablesw, + flowfiltertablesw, stats, vpc_stats_store, }) diff --git a/flow-filter/Cargo.toml b/flow-filter/Cargo.toml new file mode 100644 index 000000000..f20f43c42 --- /dev/null +++ b/flow-filter/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "dataplane-flow-filter" +edition.workspace = true +license.workspace = true +publish.workspace = true +version.workspace = true + +[dependencies] +config = { workspace = true } +left-right = { workspace = true } +linkme = { workspace = true } +lpm = { workspace = true } +net = { workspace = true } +pipeline = { workspace = true } +tracectl = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +lpm = { workspace = true, features = ["testing"] } +tracing-test = { workspace = true, features = [] } diff --git a/flow-filter/src/filter_rw.rs b/flow-filter/src/filter_rw.rs new file mode 100644 index 000000000..f6efa7a36 --- /dev/null +++ b/flow-filter/src/filter_rw.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Left-right integration for [`FlowFilterTable`] + +use crate::tables::FlowFilterTable; +use left_right::{Absorb, ReadGuard, ReadHandle, ReadHandleFactory, WriteHandle, new_from_empty}; +use tracing::debug; + +#[derive(Debug)] +pub(crate) enum FlowFilterTableChange { + UpdateFlowFilterTable(FlowFilterTable), +} + +impl Absorb for FlowFilterTable { + fn absorb_first(&mut self, change: &mut FlowFilterTableChange, _: &Self) { + match change { + FlowFilterTableChange::UpdateFlowFilterTable(table) => { + *self = table.clone(); + } + } + } + fn drop_first(self: Box) {} + fn sync_with(&mut self, first: &Self) { + *self = first.clone(); + } +} + +#[derive(Debug)] +pub struct FlowFilterTableReader(ReadHandle); + +impl FlowFilterTableReader { + pub(crate) fn enter(&self) -> Option> { + self.0.enter() + } + + #[must_use] + pub fn factory(&self) -> FlowFilterTableReaderFactory { + FlowFilterTableReaderFactory(self.0.factory()) + } +} + +#[derive(Debug)] +pub struct FlowFilterTableReaderFactory(ReadHandleFactory); + +impl FlowFilterTableReaderFactory { + #[must_use] + pub fn handle(&self) -> FlowFilterTableReader { + FlowFilterTableReader(self.0.handle()) + } +} + +#[derive(Debug)] +pub struct FlowFilterTableWriter(WriteHandle); + +impl FlowFilterTableWriter { + #[must_use] + #[allow(clippy::new_without_default)] + pub fn new() -> FlowFilterTableWriter { + let (w, _r) = + new_from_empty::(FlowFilterTable::new()); + FlowFilterTableWriter(w) + } + + #[must_use] + pub fn get_reader(&self) -> FlowFilterTableReader { + FlowFilterTableReader(self.0.clone()) + } + + pub fn get_reader_factory(&self) -> FlowFilterTableReaderFactory { + self.get_reader().factory() + } + + pub fn update_flow_filter_table(&mut self, table: FlowFilterTable) { + self.0 + .append(FlowFilterTableChange::UpdateFlowFilterTable(table)); + self.0.publish(); + debug!("Updated flow filter table"); + } +} diff --git a/flow-filter/src/lib.rs b/flow-filter/src/lib.rs new file mode 100644 index 000000000..12fc70117 --- /dev/null +++ b/flow-filter/src/lib.rs @@ -0,0 +1,662 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Flow-filter pipeline stage +//! +//! [`FlowFilter`] is a pipeline stage serving two purposes: +//! +//! - It retrieves the destination VPC discriminant for the packet, when possible, and attaches it +//! to packet metadata. +//! +//! - It validates that the packet is associated with an existing peering connection, as defined in +//! the user-provided configuration. Packets that do not have a source IP, port and destination +//! IP, port corresponding to existing, valid connections between the prefixes in exposed lists of +//! peerings, get dropped. + +use net::buffer::PacketBufferMut; +use net::headers::{TryIp, TryTransport}; +use net::packet::{DoneReason, Packet}; +use pipeline::NetworkFunction; +use std::net::IpAddr; +use std::num::NonZero; +use tracing::{debug, error}; + +mod filter_rw; +mod setup; +mod tables; + +pub use filter_rw::{FlowFilterTableReader, FlowFilterTableReaderFactory, FlowFilterTableWriter}; +pub use tables::FlowFilterTable; + +use tracectl::trace_target; + +use crate::tables::VpcdLookupResult; +trace_target!("flow-filter", LevelFilter::INFO, &["pipeline"]); + +/// A structure to implement the flow-filter pipeline stage. +pub struct FlowFilter { + name: String, + tablesr: FlowFilterTableReader, +} + +impl FlowFilter { + /// Create a new [`FlowFilter`] instance. + pub fn new(name: &str, tablesr: FlowFilterTableReader) -> Self { + Self { + name: name.to_string(), + tablesr, + } + } + + /// Process a packet. + fn process_packet( + &self, + tablesr: &left_right::ReadGuard<'_, FlowFilterTable>, + packet: &mut Packet, + ) { + let nfi = &self.name; + + let Some(net) = packet.try_ip() else { + debug!("{nfi}: No IP headers found, dropping packet"); + packet.done(DoneReason::NotIp); + return; + }; + + let Some(src_vpcd) = packet.meta.src_vpcd else { + debug!("{nfi}: Missing source VPC discriminant, dropping packet"); + packet.done(DoneReason::Unroutable); + return; + }; + + let src_ip = net.src_addr(); + let dst_ip = net.dst_addr(); + let ports = packet.try_transport().and_then(|t| { + t.src_port() + .map(NonZero::get) + .zip(t.dst_port().map(NonZero::get)) + }); + let log_str = format_packet_addrs_ports(&src_ip, &dst_ip, ports); + + let Some(dst_vpcd_lookup_res) = tablesr.lookup(src_vpcd, &src_ip, &dst_ip, ports) else { + debug!("{nfi}: Flow not allowed, dropping packet: {log_str}"); + packet.done(DoneReason::Filtered); + return; + }; + + match dst_vpcd_lookup_res { + VpcdLookupResult::Single(dst_vpcd) => { + debug!("{nfi}: Set packet dst_vpcd to {dst_vpcd}: {log_str}"); + packet.meta.dst_vpcd = Some(dst_vpcd); + } + VpcdLookupResult::MultipleMatches => { + debug!( + "{nfi}: Ambiguous dst_vpcd for {dst_ip} in src_vpcd {src_vpcd}: falling back to flow table lookup to see if a session exists" + ); + } + } + + debug!("{nfi}: Flow allowed: {log_str}"); + } +} + +impl NetworkFunction for FlowFilter { + fn process<'a, Input: Iterator> + 'a>( + &'a mut self, + input: Input, + ) -> impl Iterator> + 'a { + input.filter_map(|mut packet| { + if let Some(tablesr) = &self.tablesr.enter() { + if !packet.is_done() { + self.process_packet(tablesr, &mut packet); + } + } else { + error!("{}: failed to read flow filter table", self.name); + packet.done(DoneReason::InternalFailure); + } + packet.enforce() + }) + } +} + +fn format_packet_addrs_ports( + src_addr: &IpAddr, + dst_addr: &IpAddr, + ports: Option<(u16, u16)>, +) -> String { + format!( + "src={src_addr}{}, dst={dst_addr}{}", + ports.map_or(String::new(), |p| format!(":{}", p.0)), + ports.map_or(String::new(), |p| format!(":{}", p.1)) + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::filter_rw::FlowFilterTableWriter; + use crate::tables::OptionalPortRange; + use config::external::overlay::Overlay; + use config::external::overlay::vpc::{Vpc, VpcTable}; + use config::external::overlay::vpcpeering::{ + VpcExpose, VpcManifest, VpcPeering, VpcPeeringTable, + }; + use lpm::prefix::Prefix; + use net::buffer::TestBuffer; + use net::headers::{Net, TryHeadersMut, TryIpMut}; + use net::ipv4::addr::UnicastIpv4Addr; + use net::ipv6::addr::UnicastIpv6Addr; + use net::packet::test_utils::{build_test_ipv4_packet, build_test_ipv6_packet}; + use net::packet::{DoneReason, Packet, VpcDiscriminant}; + use net::vxlan::Vni; + + fn vpcd(vni: u32) -> VpcDiscriminant { + VpcDiscriminant::VNI(Vni::new_checked(vni).unwrap()) + } + + fn set_src_addr(packet: &mut Packet, addr: IpAddr) { + let net = packet.headers_mut().try_ip_mut().unwrap(); + match net { + Net::Ipv4(ip) => { + ip.set_source(UnicastIpv4Addr::try_from(addr).unwrap()); + } + Net::Ipv6(ip) => { + ip.set_source(UnicastIpv6Addr::try_from(addr).unwrap()); + } + } + } + + fn set_dst_addr(packet: &mut Packet, addr: IpAddr) { + let net = packet.headers_mut().try_ip_mut().unwrap(); + match net { + Net::Ipv4(ip) => { + ip.set_destination(UnicastIpv4Addr::try_from(addr).unwrap().into()); + } + Net::Ipv6(ip) => { + ip.set_destination(UnicastIpv6Addr::try_from(addr).unwrap().into()); + } + } + } + + fn create_test_packet( + src_vpcd: Option, + src_addr: IpAddr, + dst_addr: IpAddr, + ) -> Packet { + let mut packet = match dst_addr { + IpAddr::V4(_) => build_test_ipv4_packet(100).unwrap(), + IpAddr::V6(_) => build_test_ipv6_packet(100).unwrap(), + }; + set_src_addr(&mut packet, src_addr); + set_dst_addr(&mut packet, dst_addr); + packet.meta.src_vpcd = src_vpcd.map(VpcDiscriminant::VNI); + packet + } + + #[test] + fn test_flow_filter_packet_allowed() { + // Setup table + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = vpcd(200); + + table + .insert( + src_vpcd, + VpcdLookupResult::Single(dst_vpcd), + Prefix::from("10.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Create test packet + let packet = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "10.0.0.5".parse().unwrap(), + "20.0.0.10".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(dst_vpcd)); + } + + #[test] + fn test_flow_filter_packet_filtered() { + // Setup table + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = vpcd(200); + + table + .insert( + src_vpcd, + VpcdLookupResult::Single(dst_vpcd), + Prefix::from("10.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Create test packet with non-matching destination + let packet = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "10.0.0.5".parse().unwrap(), + "30.0.0.10".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert_eq!(packets[0].get_done(), Some(DoneReason::Filtered)); + } + + #[test] + fn test_flow_filter_missing_src_vpcd() { + let table = FlowFilterTable::new(); + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Create test packet without src_vpcd + let packet = create_test_packet( + None, + "10.0.0.5".parse().unwrap(), + "20.0.0.10".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert_eq!(packets[0].get_done(), Some(DoneReason::Unroutable)); + } + + #[test] + fn test_flow_filter_no_matching_src_prefix() { + // Setup table + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = vpcd(200); + + table + .insert( + src_vpcd, + VpcdLookupResult::Single(dst_vpcd), + Prefix::from("10.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Create test packet with non-matching source address + let packet = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "11.0.0.5".parse().unwrap(), + "20.0.0.10".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert_eq!(packets[0].get_done(), Some(DoneReason::Filtered)); + } + + #[test] + fn test_flow_filter_multiple_matches_no_dst_vpcd() { + // Setup table with overlapping destination prefixes from different VPCs + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + + // Manually set up a scenario where dst_vpcd lookup returns MultipleMatches + // This happens when the same destination can be reached from multiple VPCs + table + .insert( + src_vpcd, + VpcdLookupResult::MultipleMatches, + Prefix::from("10.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Create test packet + let packet = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "10.0.0.5".parse().unwrap(), + "20.0.0.10".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done()); + assert!(packets[0].meta.dst_vpcd.is_none()); + } + + fn do_overlay_stuff(overlay: &mut Overlay) { + // This is usually part of the overlay validation process when validating the config, + // but we don't go through the full config creation here. + // See Overlay::validate. + use std::collections::BTreeMap; + + let id_map = overlay + .vpc_table + .values() + .map(|vpc| (vpc.name.clone(), vpc.id.clone())) + .collect::>(); + + overlay + .vpc_table + .collect_peerings(&overlay.peering_table, &id_map); + } + + #[test] + fn test_flow_filter_table_overlap_cases() { + let vni1 = Vni::new_checked(100).unwrap(); + let vni2 = Vni::new_checked(200).unwrap(); + let vni3 = Vni::new_checked(300).unwrap(); + + let mut vpc_table = VpcTable::new(); + vpc_table + .add(Vpc::new("vpc1", "VPC01", vni1.as_u32()).unwrap()) + .unwrap(); + vpc_table + .add(Vpc::new("vpc2", "VPC02", vni2.as_u32()).unwrap()) + .unwrap(); + vpc_table + .add(Vpc::new("vpc3", "VPC03", vni3.as_u32()).unwrap()) + .unwrap(); + + // - vpc1-to-vpc2: + // VPC01: + // prefixes: + // - 1.0.0.0/24 + // VPC02: + // prefixes: + // - 5.0.0.0/24 + // + // - vpc2-to-vpc3: + // VPC02: + // prefixes: + // - 5.0.0.0/24 + // - 6.0.0.0/24 + // VPC03: + // prefixes: + // - 1.0.0.64/26 // 1.0.0.64 to 1.0.0.127 + let mut peering_table = VpcPeeringTable::new(); + peering_table + .add(VpcPeering::new( + "vpc1-to-vpc2", + VpcManifest { + name: "vpc1".to_string(), + exposes: vec![VpcExpose::empty().ip("1.0.0.0/24".into())], + }, + VpcManifest { + name: "vpc2".to_string(), + exposes: vec![VpcExpose::empty().ip("5.0.0.0/24".into())], + }, + None, + )) + .unwrap(); + + peering_table + .add(VpcPeering::new( + "vpc2-to-vpc3", + VpcManifest { + name: "vpc2".to_string(), + exposes: vec![ + VpcExpose::empty().ip("5.0.0.0/24".into()), + VpcExpose::empty().ip("6.0.0.0/24".into()), + ], + }, + VpcManifest { + name: "vpc3".to_string(), + exposes: vec![VpcExpose::empty().ip("1.0.0.64/26".into())], + }, + None, + )) + .unwrap(); + + let mut overlay = Overlay::new(vpc_table, peering_table); + do_overlay_stuff(&mut overlay); + + let table = FlowFilterTable::build_from_overlay(&overlay).unwrap(); + + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Test with packets + + // VPC-1 -> VPC-2: No ambiguity + let packet = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "1.0.0.5".parse().unwrap(), + "5.0.0.10".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done(), "{:?}", packets[0].get_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(vpcd(vni2.into()))); + + // VPC-3 -> VPC-2: No ambiguity + let packet = create_test_packet( + Some(Vni::new_checked(300).unwrap()), + "1.0.0.70".parse().unwrap(), + "5.0.0.10".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done(), "{:?}", packets[0].get_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(vpcd(vni2.into()))); + + // VPC-2 -> VPC-1 using lower non-overlapping destination prefix section + let packet = create_test_packet( + Some(Vni::new_checked(200).unwrap()), + "5.0.0.10".parse().unwrap(), + "1.0.0.5".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done(), "{:?}", packets[0].get_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(vpcd(vni1.into()))); + + // VPC-2 -> VPC-1 using upper non-overlapping destination prefix section + let packet = create_test_packet( + Some(Vni::new_checked(200).unwrap()), + "5.0.0.10".parse().unwrap(), + "1.0.0.205".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done(), "{:?}", packets[0].get_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(vpcd(vni1.into()))); + + // VPC-2 -> VPC-3 using non-overlapping source prefix + let packet = create_test_packet( + Some(Vni::new_checked(200).unwrap()), + "6.0.0.11".parse().unwrap(), + "1.0.0.70".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done(), "{:?}", packets[0].get_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(vpcd(vni3.into()))); + + // VPC-2 -> VPC-??? using overlapping prefix sections: multiple matches + let packet = create_test_packet( + Some(Vni::new_checked(200).unwrap()), + "5.0.0.10".parse().unwrap(), + "1.0.0.70".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done(), "{:?}", packets[0].get_done()); + assert_eq!(packets[0].meta.dst_vpcd, None) + } + + #[test] + fn test_flow_filter_ipv6() { + // Setup table + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = vpcd(200); + + table + .insert( + src_vpcd, + VpcdLookupResult::Single(dst_vpcd), + Prefix::from("2001:db8::/32"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("2001:db9::/32"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Create test packet + let packet = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "2001:db8::1".parse().unwrap(), + "2001:db9::1".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 1); + assert!(!packets[0].is_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(dst_vpcd)); + } + + #[test] + fn test_flow_filter_batch_processing() { + // Setup table + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = vpcd(200); + + table + .insert( + src_vpcd, + VpcdLookupResult::Single(dst_vpcd), + Prefix::from("10.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let mut writer = FlowFilterTableWriter::new(); + writer.update_flow_filter_table(table); + + let mut flow_filter = FlowFilter::new("test-filter", writer.get_reader()); + + // Create multiple test packets + let packet1 = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "10.0.0.5".parse().unwrap(), + "20.0.0.10".parse().unwrap(), + ); + let packet2 = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "10.0.0.6".parse().unwrap(), + "30.0.0.10".parse().unwrap(), // Should be filtered + ); + let packet3 = create_test_packet( + Some(Vni::new_checked(100).unwrap()), + "10.0.0.7".parse().unwrap(), + "20.0.0.20".parse().unwrap(), + ); + + let packets = flow_filter + .process([packet1, packet2, packet3].into_iter()) + .collect::>(); + + assert_eq!(packets.len(), 3); + assert!(!packets[0].is_done()); + assert_eq!(packets[0].meta.dst_vpcd, Some(dst_vpcd)); + assert_eq!(packets[1].get_done(), Some(DoneReason::Filtered)); + assert!(!packets[2].is_done()); + assert_eq!(packets[2].meta.dst_vpcd, Some(dst_vpcd)); + } + + #[test] + fn test_format_packet_addrs_ports() { + let src_addr = "10.0.0.1".parse().unwrap(); + let dst_addr = "20.0.0.2".parse().unwrap(); + + let result = format_packet_addrs_ports(&src_addr, &dst_addr, Some((8080, 443))); + assert_eq!(result, "src=10.0.0.1:8080, dst=20.0.0.2:443"); + + let result_no_ports = format_packet_addrs_ports(&src_addr, &dst_addr, None); + assert_eq!(result_no_ports, "src=10.0.0.1, dst=20.0.0.2"); + } +} diff --git a/flow-filter/src/setup.rs b/flow-filter/src/setup.rs new file mode 100644 index 000000000..d40f32900 --- /dev/null +++ b/flow-filter/src/setup.rs @@ -0,0 +1,864 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +use crate::FlowFilterTable; +use crate::tables::VpcdLookupResult; +use config::ConfigError; +use config::external::overlay::Overlay; +use config::external::overlay::vpc::{Peering, Vpc}; +use config::external::overlay::vpcpeering::{VpcExpose, VpcManifest}; +use config::internal::interfaces::interface::InterfaceConfigTable; +use config::utils::{ConfigUtilError, collapse_prefixes_peering}; +use lpm::prefix::{IpRangeWithPorts, PrefixWithOptionalPorts}; +use net::packet::VpcDiscriminant; +use std::collections::{BTreeMap, BTreeSet, HashSet}; + +impl FlowFilterTable { + /// Build a [`FlowFilterTable`] from an overlay + pub fn build_from_overlay(overlay: &Overlay) -> Result { + let clean_vpc_table = cleanup_vpc_table(overlay.vpc_table.values().collect())?; + let mut table = FlowFilterTable::new(); + + for vpc in &clean_vpc_table { + for peering in &vpc.peerings { + table.process_peering(overlay, vpc, peering)?; + } + } + Ok(table) + } + + // When processing a peering, we split prefixes when they have partial overlapping with prefixes + // from other peerings for the same VPC + // + // For example: + // + // - VPC A is peered with VPC B and C + // - VPC B exposes 10.0.0.0/24 and 20.0.0.128/25 + // - VPC C exposes 10.0.0.0/25 and 20.0.0.0/24 + // + // When packet A sends to 10.0.0.1, we don't know whether the destination VPC is B or C. + // However, if A sends to 10.0.0.200, we know that the destination is in VPC B. + // + // To account for the non-overlapping section of the prefixes, we split the prefix exposed by + // the remote end of the peering: for A's peering with B, the remote end becomes {10.0.0.0/25, + // 10.0.0.128/25, 20.0.0.128/25}. This way, when we do the destination VPC lookup, we can tell + // that the result is ambiguous if we get a match on 10.0.0.0/25, but we can find a unique + // answer if we get a match on 10.0.0.128/25. + // + // Similarly, for A's peering with C, the remote ends of the peering becomes {10.0.0.0/25, + // 20.0.0.0/25, 20.0.0.128/25}. + fn process_peering( + &mut self, + overlay: &Overlay, + vpc: &Vpc, + peering: &Peering, + ) -> Result<(), ConfigError> { + let local_vpcd = VpcDiscriminant::VNI(vpc.vni); + + // Compute lists of overlapping prefixes: + // - between prefixes from remote manifest and prefixes from remote manifests for other peerings + // - between prefixes from local manifest and prefixes from local manifests for other peerings + let mut local_manifests_overlap = BTreeMap::new(); + let mut remote_manifests_overlap = BTreeMap::new(); + for other_peering in &vpc.peerings { + if other_peering.name == peering.name { + // Don't compare peering with itself + continue; + } + let remote_vpcd = VpcDiscriminant::VNI(overlay.vpc_table.get_remote_vni(other_peering)); + // Get overlap for prefixes related to source address + let local_overlap = get_manifest_ips_overlap( + &peering.local, + &local_vpcd, + &other_peering.local, + &remote_vpcd, + |expose| &expose.ips, + ); + // Get overlap for prefixes related to destination address + let remote_overlap = get_manifest_ips_overlap( + &peering.remote, + &local_vpcd, + &other_peering.remote, + &remote_vpcd, + |expose| expose.public_ips(), + ); + + if local_overlap.is_empty() || remote_overlap.is_empty() { + // If either side has no overlap, we'll be able to tell which is the destination VPC + // by looking at both the source and destination prefixes for the packet, so there's + // no need to account for any overlap + continue; + } + + // If there's overlap for both source and destination, we'll need to split prefixes to + // separate the overlapping sections, so we can determine the destination VPC for + // non-overlapping sections + local_manifests_overlap.extend(local_overlap); + remote_manifests_overlap.extend(remote_overlap); + } + + let dst_vpcd = VpcDiscriminant::VNI(overlay.vpc_table.get_remote_vni(peering)); + + // Get list of local prefixes, splitting to account for overlapping, if necessary + let overlap_trie = consolidate_overlap_list(local_manifests_overlap); + let local_prefixes = get_split_prefixes_for_manifest( + &peering.local, + &dst_vpcd, + |expose| &expose.ips, + overlap_trie, + ); + + // Get list of remote prefixes, splitting to account for overlapping, if necessary + let overlap_trie = consolidate_overlap_list(remote_manifests_overlap); + let remote_prefixes = get_split_prefixes_for_manifest( + &peering.remote, + &dst_vpcd, + |expose| expose.public_ips(), + overlap_trie, + ); + + // For each local prefix, add one entry for each associated remote prefix + for (local_prefix, local_vpcd_result) in &local_prefixes { + for (remote_prefix, remote_vpcd_result) in &remote_prefixes { + let remote_vpcd_to_use = match (remote_vpcd_result, local_vpcd_result) { + (VpcdLookupResult::MultipleMatches, VpcdLookupResult::Single(_)) => { + // If the remote prefix is ambiguous but we are able to tell what + // destination VPC to use based on the local prefix in use, do so + local_vpcd_result.clone() + } + _ => remote_vpcd_result.clone(), + }; + self.insert( + local_vpcd, + remote_vpcd_to_use, + local_prefix.prefix(), + local_prefix.ports().into(), + remote_prefix.prefix(), + remote_prefix.ports().into(), + )?; + } + } + Ok(()) + } +} + +fn cleanup_vpc_table(vpcs: Vec<&Vpc>) -> Result, ConfigError> { + let mut new_set = Vec::new(); + for vpc in vpcs { + let mut new_vpc = clone_skipping_peerings(vpc); + + for peering in &vpc.peerings { + // "Collapse" prefixes to get rid of exclusion prefixes + let collapsed_peering = collapse_prefixes_peering(peering).map_err(|e| match e { + ConfigUtilError::SplitPrefixError(prefix) => { + ConfigError::FailureApply(format!("Failed to split prefix: {prefix}")) + } + })?; + new_vpc.peerings.push(collapsed_peering); + } + new_set.push(new_vpc); + } + Ok(new_set) +} + +fn clone_skipping_peerings(vpc: &Vpc) -> Vpc { + Vpc { + name: vpc.name.clone(), + id: vpc.id.clone(), + vni: vpc.vni, + interfaces: InterfaceConfigTable::default(), + peerings: vec![], + } +} + +// Return the list of overlapping prefix sections between the sets of exposed prefixes of two +// manifests +// +// For example: +// +// - first manifest exposes 1.0.0.0/24 and 2.0.0.128/25 +// - second manifest exposes 1.0.0.0/23, 2.0.0.0/24, and 3.0.0.0/8 +// - the function returns [1.0.0.0/24, 2.0.0.128/25] and associated VPC discriminants +fn get_manifest_ips_overlap( + manifest_left: &VpcManifest, + vcpd_left: &VpcDiscriminant, + manifest_right: &VpcManifest, + vcpd_right: &VpcDiscriminant, + get_ips: fn(&VpcExpose) -> &BTreeSet, +) -> BTreeMap> { + let mut overlap = BTreeMap::new(); + for prefix_left in manifest_left + .exposes + .iter() + .flat_map(|expose| get_ips(expose).iter()) + { + for prefix_right in manifest_right + .exposes + .iter() + .flat_map(|expose| get_ips(expose).iter()) + { + if let Some(intersection) = prefix_left.intersection(prefix_right) { + let vpcds = HashSet::from([*vcpd_left, *vcpd_right]); + overlap.insert(intersection, vpcds); + } + } + } + overlap +} + +// Consolidate overlapping prefixes, by merging adjacent prefixes when possible +// This is to avoid splitting prefixes for a peering more than necessary +fn consolidate_overlap_list( + mut overlap: BTreeMap>, +) -> BTreeMap> { + let mut consolidated_overlap = BTreeMap::new(); + while let Some((first_prefix, first_vpcds)) = overlap.pop_first() { + let Some((&second_prefix, second_vpcds)) = overlap.first_key_value() else { + // We've reached the end of the list, just insert the last item we popped + consolidated_overlap.insert(first_prefix, first_vpcds.clone()); + break; + }; + if let Some(merged_prefix) = first_prefix.merge(&second_prefix) { + let merged_set = first_vpcds.union(second_vpcds).cloned().collect(); + overlap.remove(&second_prefix); + overlap.insert(merged_prefix, merged_set); + continue; + } + consolidated_overlap.insert(first_prefix, first_vpcds.clone()); + } + consolidated_overlap +} + +// Return all exposed prefixes for a manifest, split such that there is no partial overlapping with +// manifests for other peerings. +// +// For example: +// +// - VPC A is peered with VPC B and C +// - VPC B exposes 10.0.0.0/24 +// - VPC C exposes 10.0.0.0/25 +// +// Then the prefixes in the remote manifests for VPC A's peerings will be: +// +// - For VPC B: [10.0.0.0/25, 10.0.0.128/25] (split so that 10.0.0.0/24 does not overlap partially +// with VPC C's 10.0.0.0/25) +// - For VPC C: [10.0.0.0/25] +fn get_split_prefixes_for_manifest( + manifest: &VpcManifest, + vpcd: &VpcDiscriminant, + get_ips: fn(&VpcExpose) -> &BTreeSet, + overlaps: BTreeMap>, +) -> Vec<(PrefixWithOptionalPorts, VpcdLookupResult)> { + let mut prefixes_with_vpcd = Vec::new(); + 'next_prefix: for prefix in manifest + .exposes + .iter() + .flat_map(|expose| get_ips(expose).iter()) + { + for (overlap_prefix, _overlap_vpcds) in overlaps.iter() { + if overlap_prefix.covers(prefix) { + // The overlap prefix covers the current prefix, so we know the current prefix is + // overlapping and is associated to multiple matches for the destination VPC lookup + prefixes_with_vpcd.push((*prefix, VpcdLookupResult::MultipleMatches)); + continue 'next_prefix; + } else if prefix.covers(overlap_prefix) { + // The current prefix partially overlaps with some other prefixes (of which + // overlap_prefix is the union of all intersections with the current prefix), so we + // need to split the current prefix into parts that don't have partial overlap with + // the other prefixes + prefixes_with_vpcd.extend( + split_overlapping(prefix, overlap_prefix) + .into_iter() + .map(|p| { + ( + p, + if p == *overlap_prefix { + // Multiple destination VPC matches for the overlapping section + VpcdLookupResult::MultipleMatches + } else { + // Single destination VPC match for the other sections + VpcdLookupResult::Single(*vpcd) + }, + ) + }), + ); + continue 'next_prefix; + } + } + // We found no overlap, just add the prefix with the single associated destination VPC + prefixes_with_vpcd.push((*prefix, VpcdLookupResult::Single(*vpcd))); + } + prefixes_with_vpcd +} + +// Split a prefix into the given subprefix, and the difference +fn split_overlapping( + prefix_to_split: &PrefixWithOptionalPorts, + mask_prefix: &PrefixWithOptionalPorts, +) -> Vec { + debug_assert!(prefix_to_split.overlaps(mask_prefix) && !mask_prefix.covers(prefix_to_split)); + let mut split_prefixes = prefix_to_split.subtract(mask_prefix); + split_prefixes.push( + prefix_to_split + .intersection(mask_prefix) + // Intersection non-empty given that prefixes overlap + .unwrap_or_else(|| unreachable!()), + ); + split_prefixes +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::VpcdLookupResult; + use config::external::overlay::vpc::{Vpc, VpcTable}; + use config::external::overlay::vpcpeering::{VpcExpose, VpcManifest, VpcPeeringTable}; + use lpm::prefix::{PortRange, Prefix, PrefixWithPortsSize}; + use net::vxlan::Vni; + use std::collections::BTreeSet; + use std::ops::Bound; + + #[test] + fn test_split_overlapping_basic() { + // Test splitting 10.0.0.0/16 with mask 10.0.1.0/24 + let prefix_to_split = PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/16"), None); + let mask_prefix = PrefixWithOptionalPorts::new(Prefix::from("10.0.1.0/24"), None); + + let result: BTreeSet<_> = split_overlapping(&prefix_to_split, &mask_prefix) + .into_iter() + .collect(); + + // Should produce the intersection (10.0.1.0/24) and the remainder parts + assert!(!result.is_empty()); + + // Verify that one of the results is the intersection + assert!(result.contains(&mask_prefix)); + + // Verify all results together are the same size as the original prefix + let total_ips = result + .iter() + .fold(PrefixWithPortsSize::from(0u8), |sum, prefix| { + sum + prefix.size() + }); + let original_ips = prefix_to_split.size(); + assert_eq!(total_ips, original_ips); + + // Verify all results are within the original prefix + for prefix in &result { + assert!(prefix_to_split.covers(prefix)); + } + + // Verify results do not overlap + for i in &result.clone() { + for j in result.range((Bound::Excluded(i), Bound::Unbounded)) { + assert!(!i.overlaps(j)); + } + } + + // Just to be on the safe side for this test, check the list manually + let expected = BTreeSet::from([ + PrefixWithOptionalPorts::new(Prefix::from("10.0.128.0/17"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.64.0/18"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.32.0/19"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.16.0/20"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.8.0/21"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.4.0/22"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.3.0/23"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.1.0/24"), None), + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/24"), None), + ]); + assert_eq!(result, expected); + } + + #[test] + fn test_split_overlapping_with_ports() { + // Test splitting with port ranges + let port_range1 = PortRange::new(80, 443).unwrap(); + let port_range2 = PortRange::new(100, 200).unwrap(); + + let prefix_to_split = + PrefixWithOptionalPorts::new(Prefix::from("192.168.0.0/16"), Some(port_range1)); + let mask_prefix = + PrefixWithOptionalPorts::new(Prefix::from("192.168.1.0/24"), Some(port_range2)); + + let result: BTreeSet<_> = split_overlapping(&prefix_to_split, &mask_prefix) + .into_iter() + .collect(); + + // Should produce multiple prefixes including the intersection + assert!(!result.is_empty()); + + // The intersection should have the intersection of both IP prefix and port range + let intersection = prefix_to_split.intersection(&mask_prefix).unwrap(); + assert!(result.contains(&intersection)); + + // Check the list manually + let expected = BTreeSet::from([ + PrefixWithOptionalPorts::new( + Prefix::from("192.168.0.0/16"), + Some(PortRange::new(80, 99).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.0.0/16"), + Some(PortRange::new(201, 443).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.128.0/17"), + Some(PortRange::new(100, 200).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.64.0/18"), + Some(PortRange::new(100, 200).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.32.0/19"), + Some(PortRange::new(100, 200).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.16.0/20"), + Some(PortRange::new(100, 200).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.8.0/21"), + Some(PortRange::new(100, 200).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.4.0/22"), + Some(PortRange::new(100, 200).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.3.0/23"), + Some(PortRange::new(100, 200).unwrap()), + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.1.0/24"), + Some(PortRange::new(100, 200).unwrap()), // Corresponds to the mask + ), + PrefixWithOptionalPorts::new( + Prefix::from("192.168.0.0/24"), + Some(PortRange::new(100, 200).unwrap()), + ), + ]); + assert_eq!(result, expected, "{result:#?},\n {expected:#?}"); + } + + #[test] + fn test_get_manifest_ips_overlap_no_overlap() { + let vpcd1 = VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()); + let vpcd2 = VpcDiscriminant::VNI(Vni::new_checked(200).unwrap()); + + let manifest1 = VpcManifest { + name: "manifest1".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }; + + let manifest2 = VpcManifest { + name: "manifest2".to_string(), + exposes: vec![VpcExpose::empty().ip("20.0.0.0/24".into())], + }; + + let overlap = + get_manifest_ips_overlap(&manifest1, &vpcd1, &manifest2, &vpcd2, |expose| &expose.ips); + + // No overlap between 10.0.0.0/24 and 20.0.0.0/24 + assert!(overlap.is_empty()); + } + + #[test] + fn test_get_manifest_ips_overlap_with_overlap() { + let vpcd1 = VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()); + let vpcd2 = VpcDiscriminant::VNI(Vni::new_checked(200).unwrap()); + + let manifest1 = VpcManifest { + name: "manifest1".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }; + + let manifest2 = VpcManifest { + name: "manifest2".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/25".into())], + }; + + let overlap = + get_manifest_ips_overlap(&manifest1, &vpcd1, &manifest2, &vpcd2, |expose| &expose.ips); + + // Should have one overlap: 10.0.0.0/25 (intersection of /24 and /25) + assert_eq!(overlap.len(), 1); + let expected_prefix = PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/25"), None); + assert!(overlap.contains_key(&expected_prefix)); + + let vpcds = overlap.get(&expected_prefix).unwrap(); + assert_eq!(vpcds.len(), 2); + assert!(vpcds.contains(&vpcd1)); + assert!(vpcds.contains(&vpcd2)); + } + + #[test] + fn test_get_manifest_ips_overlap_multiple_prefixes() { + let vpcd1 = VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()); + let vpcd2 = VpcDiscriminant::VNI(Vni::new_checked(200).unwrap()); + + let manifest1 = VpcManifest { + name: "manifest1".to_string(), + exposes: vec![ + VpcExpose::empty() + .ip("10.0.0.0/24".into()) + .ip("20.0.0.128/25".into()), + ], + }; + + let manifest2 = VpcManifest { + name: "manifest2".to_string(), + exposes: vec![ + VpcExpose::empty().ip("10.0.0.0/25".into()), + VpcExpose::empty().ip("20.0.0.0/24".into()), + ], + }; + + let overlap = + get_manifest_ips_overlap(&manifest1, &vpcd1, &manifest2, &vpcd2, |expose| &expose.ips); + + // Should have two overlaps: 10.0.0.0/25 and 20.0.0.128/25 + assert_eq!(overlap.len(), 2); + + let prefix1 = PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/25"), None); + let prefix2 = PrefixWithOptionalPorts::new(Prefix::from("20.0.0.128/25"), None); + + assert!(overlap.contains_key(&prefix1)); + assert!(overlap.contains_key(&prefix2)); + } + + #[test] + fn test_consolidate_overlap_list_no_merge() { + let vpcd1 = VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()); + let vpcd2 = VpcDiscriminant::VNI(Vni::new_checked(200).unwrap()); + + let mut overlap = BTreeMap::new(); + overlap.insert( + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/25"), None), + HashSet::from([vpcd1]), + ); + overlap.insert( + PrefixWithOptionalPorts::new(Prefix::from("20.0.0.0/25"), None), + HashSet::from([vpcd2]), + ); + + let result = consolidate_overlap_list(overlap); + + // Should have two separate prefixes (no merging possible) + assert_eq!(result.len(), 2); + } + + #[test] + fn test_consolidate_overlap_list_with_merge() { + let vpcd1 = VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()); + let vpcd2 = VpcDiscriminant::VNI(Vni::new_checked(200).unwrap()); + + let mut overlap = BTreeMap::new(); + // These two adjacent /25 prefixes can merge into a /24 + overlap.insert( + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/25"), None), + HashSet::from([vpcd1, vpcd2]), + ); + overlap.insert( + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.128/25"), None), + HashSet::from([vpcd1, vpcd2]), + ); + + let result = consolidate_overlap_list(overlap); + + // Should merge into a single /24 + assert_eq!(result.len(), 1); + let expected_prefix = PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/24"), None); + assert!(result.contains_key(&expected_prefix)); + } + + #[test] + fn test_get_split_prefixes_for_manifest_no_overlap() { + let vpcd = VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()); + + let manifest = VpcManifest { + name: "manifest".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }; + + let overlaps = BTreeMap::new(); + + let result = + get_split_prefixes_for_manifest(&manifest, &vpcd, |expose| &expose.ips, overlaps); + + // With no overlaps, should return the original prefix with Single result + assert_eq!(result.len(), 1); + assert_eq!( + result[0].0, + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/24"), None) + ); + assert_eq!(result[0].1, VpcdLookupResult::Single(vpcd)); + } + + #[test] + fn test_get_split_prefixes_for_manifest_with_overlap() { + let vpcd1 = VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()); + let vpcd2 = VpcDiscriminant::VNI(Vni::new_checked(200).unwrap()); + + let manifest = VpcManifest { + name: "manifest".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }; + + let mut overlaps = BTreeMap::new(); + // The overlap covers part of the manifest's prefix + overlaps.insert( + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/25"), None), + HashSet::from([vpcd1, vpcd2]), + ); + + let mut result = + get_split_prefixes_for_manifest(&manifest, &vpcd1, |expose| &expose.ips, overlaps); + result.sort_by_key(|(prefix, _)| *prefix); + + // Should split into multiple prefixes + assert_eq!(result.len(), 2); + assert_eq!( + result[0].0, + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.0/25"), None) + ); + assert_eq!(result[0].1, VpcdLookupResult::MultipleMatches); + assert_eq!( + result[1].0, + PrefixWithOptionalPorts::new(Prefix::from("10.0.0.128/25"), None) + ); + assert_eq!(result[1].1, VpcdLookupResult::Single(vpcd1)); + } + + #[test] + fn test_process_peering_no_overlap() { + let mut vpc_table = VpcTable::new(); + + let vni1 = Vni::new_checked(100).unwrap(); + let vni2 = Vni::new_checked(200).unwrap(); + + let mut vpc1 = Vpc::new("vpc1", "VPC01", vni1.as_u32()).unwrap(); + let vpc2 = Vpc::new("vpc2", "VPC02", vni2.as_u32()).unwrap(); + + vpc1.peerings.push(Peering { + name: "vpc1-to-vpc2".to_string(), + local: VpcManifest { + name: "vpc1-local".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }, + remote: VpcManifest { + name: "vpc2-remote".to_string(), + exposes: vec![VpcExpose::empty().ip("20.0.0.0/24".into())], + }, + remote_id: "VPC02".try_into().unwrap(), + gwgroup: None, + adv_communities: vec![], + }); + + vpc_table.add(vpc1.clone()).unwrap(); + vpc_table.add(vpc2).unwrap(); + + let overlay = Overlay { + vpc_table, + peering_table: VpcPeeringTable::new(), + }; + + let mut table = FlowFilterTable::new(); + table + .process_peering(&overlay, &vpc1, &vpc1.peerings[0]) + .unwrap(); + + let src_vpcd = VpcDiscriminant::VNI(vni1); + let src_addr = "10.0.0.5".parse().unwrap(); + let dst_addr = "20.0.0.5".parse().unwrap(); + + let dst_vpcd = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert_eq!( + dst_vpcd, + Some(VpcdLookupResult::Single(VpcDiscriminant::VNI(vni2))) + ); + } + + #[test] + fn test_process_peering_with_overlap() { + let mut vpc_table = VpcTable::new(); + + let vni1 = Vni::new_checked(100).unwrap(); + let vni2 = Vni::new_checked(200).unwrap(); + let vni3 = Vni::new_checked(300).unwrap(); + + let mut vpc1 = Vpc::new("vpc1", "VPC01", vni1.as_u32()).unwrap(); + let vpc2 = Vpc::new("vpc2", "VPC02", vni2.as_u32()).unwrap(); + let vpc3 = Vpc::new("vpc3", "VPC03", vni3.as_u32()).unwrap(); + + // Add two peerings with overlapping remote prefixes + vpc1.peerings.push(Peering { + name: "vpc1-to-vpc2".to_string(), + local: VpcManifest { + name: "vpc1-local".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }, + remote: VpcManifest { + name: "vpc2-remote".to_string(), + exposes: vec![VpcExpose::empty().ip("20.0.0.0/24".into())], + }, + remote_id: "VPC02".try_into().unwrap(), + gwgroup: None, + adv_communities: vec![], + }); + + vpc1.peerings.push(Peering { + name: "vpc1-to-vpc3".to_string(), + local: VpcManifest { + name: "vpc1-local2".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }, + remote: VpcManifest { + name: "vpc3-remote".to_string(), + exposes: vec![VpcExpose::empty().ip("20.0.0.0/25".into())], + }, + remote_id: "VPC03".try_into().unwrap(), + gwgroup: None, + adv_communities: vec![], + }); + + vpc_table.add(vpc1.clone()).unwrap(); + vpc_table.add(vpc2).unwrap(); + vpc_table.add(vpc3).unwrap(); + + let overlay = Overlay { + vpc_table, + peering_table: VpcPeeringTable::new(), + }; + + let mut table = FlowFilterTable::new(); + table + .process_peering(&overlay, &vpc1, &vpc1.peerings[0]) + .unwrap(); + + let src_vpcd = VpcDiscriminant::VNI(vni1); + let src_addr = "10.0.0.5".parse().unwrap(); + let dst_addr = "20.0.0.5".parse().unwrap(); // In overlapping segment + + let dst_vpcd = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert_eq!(dst_vpcd, Some(VpcdLookupResult::MultipleMatches)); + + let src_vpcd = VpcDiscriminant::VNI(vni1); + let src_addr = "10.0.0.5".parse().unwrap(); + let dst_addr = "20.0.0.129".parse().unwrap(); // Not in overlapping segment + + let dst_vpcd = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert_eq!( + dst_vpcd, + Some(VpcdLookupResult::Single(VpcDiscriminant::VNI(vni2))) + ); + } + + #[test] + fn test_clone_skipping_peerings() { + let mut vpc = Vpc::new("test-vpc", "VPC01", 100).unwrap(); + + vpc.peerings.push(Peering { + name: "peering1".to_string(), + local: VpcManifest { + name: "local1".to_string(), + exposes: vec![], + }, + remote: VpcManifest { + name: "remote1".to_string(), + exposes: vec![], + }, + remote_id: "VPC02".try_into().unwrap(), + gwgroup: None, + adv_communities: vec![], + }); + + let cloned = clone_skipping_peerings(&vpc); + + assert_eq!(cloned.name, vpc.name); + assert_eq!(cloned.id, vpc.id); + assert_eq!(cloned.vni, vpc.vni); + assert_eq!(cloned.peerings.len(), 0); + } + + #[test] + fn test_cleanup_vpc_table() { + let mut vpc = Vpc::new("test-vpc", "VPC01", 100).unwrap(); + + // Add a peering with some exposes + vpc.peerings.push(Peering { + name: "peering1".to_string(), + local: VpcManifest { + name: "local1".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }, + remote: VpcManifest { + name: "remote1".to_string(), + exposes: vec![VpcExpose::empty().ip("20.0.0.0/24".into())], + }, + remote_id: "VPC02".try_into().unwrap(), + gwgroup: None, + adv_communities: vec![], + }); + + let vpcs = vec![&vpc]; + let result = cleanup_vpc_table(vpcs); + + assert!(result.is_ok()); + let cleaned_vpcs = result.unwrap(); + assert_eq!(cleaned_vpcs.len(), 1); + assert_eq!(cleaned_vpcs[0].name, vpc.name); + } + + #[test] + fn test_build_from_overlay() { + // Create a simple overlay with two VPCs and a peering + let mut vpc_table = VpcTable::new(); + + let vni1 = Vni::new_checked(100).unwrap(); + let vni2 = Vni::new_checked(200).unwrap(); + + let mut vpc1 = Vpc::new("vpc1", "VPC01", vni1.as_u32()).unwrap(); + let vpc2 = Vpc::new("vpc2", "VPC02", vni2.as_u32()).unwrap(); + + // Add peering from vpc1 to vpc2 + vpc1.peerings.push(Peering { + name: "vpc1-to-vpc2".to_string(), + local: VpcManifest { + name: "vpc1-local".to_string(), + exposes: vec![VpcExpose::empty().ip("10.0.0.0/24".into())], + }, + remote: VpcManifest { + name: "vpc2-remote".to_string(), + exposes: vec![VpcExpose::empty().ip("20.0.0.0/24".into())], + }, + remote_id: "VPC02".try_into().unwrap(), + gwgroup: None, + adv_communities: vec![], + }); + + vpc_table.add(vpc1).unwrap(); + vpc_table.add(vpc2).unwrap(); + + let overlay = Overlay { + vpc_table, + peering_table: VpcPeeringTable::new(), + }; + + let result = FlowFilterTable::build_from_overlay(&overlay); + assert!(result.is_ok()); + + let table = result.unwrap(); + // Should be able to look up flows + let src_vpcd = VpcDiscriminant::VNI(vni1); + let src_addr = "10.0.0.5".parse().unwrap(); + let dst_addr = "20.0.0.5".parse().unwrap(); + + let dst_vpcd = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert_eq!( + dst_vpcd, + Some(VpcdLookupResult::Single(VpcDiscriminant::VNI(vni2))) + ); + } +} diff --git a/flow-filter/src/tables.rs b/flow-filter/src/tables.rs new file mode 100644 index 000000000..fc9f2db3f --- /dev/null +++ b/flow-filter/src/tables.rs @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! A module implementing a structure to back the flow filter lookups. + +use config::ConfigError; +use lpm::prefix::range_map::DisjointRangesBTreeMap; +use lpm::prefix::{PortRange, Prefix}; +use lpm::trie::{IpPortPrefixTrie, ValueWithAssociatedRanges}; +use net::packet::VpcDiscriminant; +use std::collections::HashMap; +use std::fmt::Debug; +use std::net::IpAddr; +use std::ops::RangeBounds; + +/// The result of a VPC discriminant lookup in the flow filter table. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum VpcdLookupResult { + /// A single VPC discriminant was found. + Single(VpcDiscriminant), + /// Multiple VPC discriminants were found, we cannot tell which is the right one for this + /// packet. + MultipleMatches, +} + +/// A structure to store information about allowed flows between VPCs. +/// It contains one table per source VPC discriminant. +// +// The structure looks like this: +// +// FlowFilterTable +// -> HashMap +// (one table per source VPC discriminant) +// +// VpcConnectionsTable +// -> IpPortPrefixTrie +// Key: source IP prefix +// Value: SrcConnectionData +// +// SrcConnectionData (enum) +// -> AllPorts(DstConnectionData): applies to all source ports +// -> Ranges(DisjointRangesBTreeMap): +// associates one or more source port ranges, for the IpPortPrefixTrie lookup, to +// destination connection data +// +// DstConnectionData +// -> IpPortPrefixTrie +// LPM trie containing destination IP prefixes and associated port/VPC information +// +// RemotePrefixPortData (enum) +// -> AllPorts(VpcdLookupResult): destination VPC for all ports (no port range specified) +// -> Ranges(DisjointRangesBTreeMap): +// associates destination port ranges to destination VPC discriminants +// +// How this works: +// +// 1. From the FlowFilterTable, find the VpcConnectionsTable for the packet's source VPC +// +// 2. Based on source IP and port, look up the SrcConnectionData in the VpcConnectionsTable +// (LPM trie). This retrieves the destination connection information for the given +// source VPC, source IP, and all associated port ranges. +// +// 3. From the SrcConnectionData, extract the DstConnectionData that matches the source port +// (if port ranges are specified). +// +// 4. Using the destination IP and port, look up in the DstConnectionData's trie to find the +// RemotePrefixPortData that matches the destination IP prefix. +// +// 5. From the RemotePrefixPortData, extract the VpcdLookupResult that matches the destination +// port (if port ranges are specified). +// +// 6. If we found a match, then the connection is valid; we return the VpcdLookupResult which +// contains either a single destination VPC discriminant or indicates multiple matches. +#[derive(Debug, Clone)] +pub struct FlowFilterTable(HashMap); + +impl FlowFilterTable { + #[allow(clippy::new_without_default)] + pub(crate) fn new() -> Self { + Self(HashMap::new()) + } + + fn insert_table(&mut self, src_vpcd: VpcDiscriminant, table: VpcConnectionsTable) { + self.0.insert(src_vpcd, table); + } + + fn get_table(&self, src_vpcd: VpcDiscriminant) -> Option<&VpcConnectionsTable> { + self.0.get(&src_vpcd) + } + + fn get_table_mut(&mut self, src_vpcd: VpcDiscriminant) -> Option<&mut VpcConnectionsTable> { + self.0.get_mut(&src_vpcd) + } + + pub(crate) fn lookup( + &self, + src_vpcd: VpcDiscriminant, + src_addr: &IpAddr, + dst_addr: &IpAddr, + ports: Option<(u16, u16)>, + ) -> Option { + // Get the table related to the source VPC for the packet + let table = self.get_table(src_vpcd)?; + + let (src_port, dst_port) = ports.unzip(); + // Look for valid connections information in the table that matches the source address and port + let (_, src_connection_data) = table.lookup(src_addr, src_port)?; + + // We have a src_connection_data object for our source VPC and source IP, and source port + // ranges associated to this IP: we may need to find the right item for this entry based on + // the source port + let dst_connection_data = src_connection_data.get_remote_prefixes_data(src_port)?; + + // We have a dst_connection_data object for our source VPC, IP, port. From this object, we + // need to retrieve the prefix information associated to our destination IP and port. + let remote_prefix_data = dst_connection_data.lookup(dst_addr, dst_port)?; + + // We have a remote_prefix_data object for our destination address, and the port ranges + // associated to this IP: we may need to find the right item for this entry based on the + // destination port + remote_prefix_data.get_vpcd(dst_port).cloned() + } + + pub(crate) fn insert( + &mut self, + src_vpcd: VpcDiscriminant, + dst_vpcd: VpcdLookupResult, + src_prefix: Prefix, + src_port_range: OptionalPortRange, + dst_prefix: Prefix, + dst_port_range: OptionalPortRange, + ) -> Result<(), ConfigError> { + if let Some(table) = self.get_table_mut(src_vpcd) { + table.insert( + dst_vpcd, + src_prefix, + src_port_range, + dst_prefix, + dst_port_range, + )?; + } else { + let mut table = VpcConnectionsTable::new(); + table.insert( + dst_vpcd, + src_prefix, + src_port_range, + dst_prefix, + dst_port_range, + )?; + self.insert_table(src_vpcd, table); + } + Ok(()) + } +} + +#[derive(Debug, Clone)] +struct VpcConnectionsTable(IpPortPrefixTrie); + +impl VpcConnectionsTable { + fn new() -> Self { + Self(IpPortPrefixTrie::new()) + } + + fn lookup(&self, addr: &IpAddr, port: Option) -> Option<(Prefix, &SrcConnectionData)> { + self.0.lookup(addr, port) + } + + fn insert( + &mut self, + dst_vpcd: VpcdLookupResult, + src_prefix: Prefix, + src_port_range: OptionalPortRange, + dst_prefix: Prefix, + dst_port_range: OptionalPortRange, + ) -> Result<(), ConfigError> { + if let Some(value) = self.0.get_mut(src_prefix) { + value.update(src_port_range, dst_vpcd, dst_prefix, dst_port_range)?; + } else { + let value = + SrcConnectionData::new(src_port_range, dst_vpcd, dst_prefix, dst_port_range); + self.0.insert(src_prefix, value); + } + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub(crate) enum SrcConnectionData { + // No port range associated with the IP prefix, the value applies to all ports. + AllPorts(DstConnectionData), + // One or several port ranges associated to the IP prefix used as the key for the table entry. + Ranges(DisjointRangesBTreeMap), +} + +impl SrcConnectionData { + fn new( + src_port_range: OptionalPortRange, + dst_vpcd: VpcdLookupResult, + dst_prefix: Prefix, + dst_port_range: OptionalPortRange, + ) -> Self { + let connection_data = DstConnectionData::new(dst_vpcd, dst_prefix, dst_port_range); + match src_port_range { + OptionalPortRange::NoPortRangeMeansAllPorts => { + SrcConnectionData::AllPorts(connection_data) + } + OptionalPortRange::Some(port_range) => { + let map = DisjointRangesBTreeMap::from_iter([(port_range, connection_data)]); + SrcConnectionData::Ranges(map) + } + } + } + fn get_remote_prefixes_data(&self, src_port: Option) -> Option<&DstConnectionData> { + match self { + SrcConnectionData::AllPorts(remote_prefixes_data) => Some(remote_prefixes_data), + SrcConnectionData::Ranges(ranges) => { + // If we don't have a source port, we can't hope to find a matching port range + let src_port = src_port?; + // connection_data contains data for the various port ranges associated to the + // prefix retrieved from table.lookup(), find the remote prefixes data related to + // the right port range for our source port + ranges + .lookup(&src_port) + .map(|(_, remote_prefixes_data)| remote_prefixes_data) + } + } + } + + fn update( + &mut self, + src_port_range: OptionalPortRange, + dst_vpcd: VpcdLookupResult, + dst_prefix: Prefix, + dst_port_range: OptionalPortRange, + ) -> Result<(), ConfigError> { + let remote_prefixes_data = match self { + SrcConnectionData::AllPorts(remote_prefixes_data) => remote_prefixes_data, + SrcConnectionData::Ranges(map) => { + let OptionalPortRange::Some(src_port_range) = src_port_range else { + // We're trying to add a port range that covers all existing ports: this means + // we've got some overlap + return Err(ConfigError::InternalFailure( + "Trying to update (local) port ranges map with overlapping ranges" + .to_string(), + )); + }; + map.get_mut(&src_port_range) + // We found an entry for this port range, we should have the port range in the map + .ok_or(ConfigError::InternalFailure( + "Cannot find entry to update in port ranges map".to_string(), + ))? + } + }; + remote_prefixes_data.update(dst_vpcd, dst_prefix, dst_port_range) + } +} + +impl ValueWithAssociatedRanges for SrcConnectionData { + fn covers_all_ports(&self) -> bool { + match self { + SrcConnectionData::AllPorts(_) => true, + SrcConnectionData::Ranges(connection_data) => { + connection_data + .keys() + .fold(0, |sum, range| sum + range.len()) + == PortRange::MAX_LENGTH + } + } + } + + fn covers_port(&self, port: u16) -> bool { + match self { + SrcConnectionData::AllPorts(_) => true, + SrcConnectionData::Ranges(ranges) => { + ranges.iter().any(|(range, _)| range.contains(&port)) + } + } + } +} + +#[derive(Debug, Clone)] +pub(crate) enum RemotePrefixPortData { + AllPorts(VpcdLookupResult), + Ranges(DisjointRangesBTreeMap), +} + +impl RemotePrefixPortData { + fn new(port_range: OptionalPortRange, vpcd: VpcdLookupResult) -> Self { + match port_range { + OptionalPortRange::NoPortRangeMeansAllPorts => RemotePrefixPortData::AllPorts(vpcd), + OptionalPortRange::Some(range) => { + RemotePrefixPortData::Ranges(DisjointRangesBTreeMap::from_iter([(range, vpcd)])) + } + } + } + + fn get_vpcd(&self, dst_port: Option) -> Option<&VpcdLookupResult> { + match self { + RemotePrefixPortData::AllPorts(vpcd) => Some(vpcd), + RemotePrefixPortData::Ranges(ranges) => { + // If we don't have a destination port, we can't hope to find a matching port range + let dst_port = dst_port?; + ranges.lookup(&dst_port).map(|(_, vpcd)| vpcd) + } + } + } +} + +impl ValueWithAssociatedRanges for RemotePrefixPortData { + fn covers_all_ports(&self) -> bool { + match self { + RemotePrefixPortData::AllPorts(_) => true, + RemotePrefixPortData::Ranges(ranges) => { + ranges.iter().fold(0, |sum, (range, _)| sum + range.len()) == PortRange::MAX_LENGTH + } + } + } + + fn covers_port(&self, port: u16) -> bool { + match self { + RemotePrefixPortData::AllPorts(_) => true, + RemotePrefixPortData::Ranges(ranges) => { + ranges.iter().any(|(range, _)| range.contains(&port)) + } + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct DstConnectionData(IpPortPrefixTrie); + +impl DstConnectionData { + fn new(vpcd: VpcdLookupResult, prefix: Prefix, port_range: OptionalPortRange) -> Self { + let remote_data = match port_range { + OptionalPortRange::NoPortRangeMeansAllPorts => RemotePrefixPortData::AllPorts(vpcd), + OptionalPortRange::Some(range) => { + RemotePrefixPortData::Ranges(DisjointRangesBTreeMap::from_iter([(range, vpcd)])) + } + }; + DstConnectionData(IpPortPrefixTrie::from(prefix, remote_data)) + } + + fn lookup(&self, addr: &IpAddr, port: Option) -> Option<&RemotePrefixPortData> { + self.0.lookup(addr, port).map(|(_, data)| data) + } + + fn update( + &mut self, + vpcd: VpcdLookupResult, + prefix: Prefix, + port_range: OptionalPortRange, + ) -> Result<(), ConfigError> { + match (self.0.get_mut(prefix), port_range) { + ( + Some(RemotePrefixPortData::Ranges(existing_range_map)), + OptionalPortRange::Some(range), + ) => { + existing_range_map.insert(range, vpcd); + } + ( + Some(RemotePrefixPortData::AllPorts(existing_vpcd)), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) => { + // We should only hit this case if we already inserted a similar entry + if *existing_vpcd != VpcdLookupResult::MultipleMatches + && vpcd != VpcdLookupResult::MultipleMatches + { + return Err(ConfigError::InternalFailure( + "Trying to insert conflicting values for remote port range".to_string(), + )); + } else { + // That's OK + } + } + (Some(_), _) => { + // At least one of the entries, the existing or the new, covers all ports, so we + // can't add a new one or we'll have overlap + return Err(ConfigError::InternalFailure( + "Trying to update (remote) port ranges map with overlapping ranges".to_string(), + )); + } + (None, range) => { + let prefix_data = RemotePrefixPortData::new(range, vpcd); + self.0.insert(prefix, prefix_data); + } + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum OptionalPortRange { + NoPortRangeMeansAllPorts, + Some(PortRange), +} + +impl From> for OptionalPortRange { + fn from(opt: Option) -> Self { + match opt { + Some(range) => OptionalPortRange::Some(range), + None => OptionalPortRange::NoPortRangeMeansAllPorts, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use lpm::prefix::Prefix; + use net::vxlan::Vni; + + fn vpcd(vni: u32) -> VpcDiscriminant { + VpcDiscriminant::VNI(Vni::new_checked(vni).unwrap()) + } + + #[test] + fn test_flow_filter_table_new() { + let table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let src_addr = "10.0.0.1".parse().unwrap(); + let dst_addr = "20.0.0.1".parse().unwrap(); + + let vpcd_result = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert!(vpcd_result.is_none()); + } + + #[test] + fn test_flow_filter_table_insert_and_contains_simple() { + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = VpcdLookupResult::Single(vpcd(200)); + + let src_prefix = Prefix::from("10.0.0.0/24"); + let dst_prefix = Prefix::from("20.0.0.0/24"); + + table + .insert( + src_vpcd, + dst_vpcd.clone(), + src_prefix, + OptionalPortRange::NoPortRangeMeansAllPorts, + dst_prefix, + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + // Should allow traffic from src to dst + let src_addr = "10.0.0.5".parse().unwrap(); + let dst_addr = "20.0.0.10".parse().unwrap(); + let vpcd_result = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert_eq!(vpcd_result, Some(dst_vpcd)); + + // Should not allow traffic from different src + let wrong_src_addr = "10.1.0.5".parse().unwrap(); + let vpcd_result = table.lookup(src_vpcd, &wrong_src_addr, &dst_addr, None); + assert!(vpcd_result.is_none()); + + // Should not allow traffic to different dst + let wrong_dst_addr = "30.0.0.10".parse().unwrap(); + let vpcd_result = table.lookup(src_vpcd, &src_addr, &wrong_dst_addr, None); + assert!(vpcd_result.is_none()); + } + + #[test] + fn test_flow_filter_table_with_port_ranges() { + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = VpcdLookupResult::Single(vpcd(200)); + + let src_prefix = Prefix::from("10.0.0.0/24"); + let dst_prefix = Prefix::from("20.0.0.0/24"); + let src_port_range = OptionalPortRange::Some(PortRange::new(1024, 2048).unwrap()); + let dst_port_range = OptionalPortRange::Some(PortRange::new(80, 80).unwrap()); + + table + .insert( + src_vpcd, + dst_vpcd.clone(), + src_prefix, + src_port_range, + dst_prefix, + dst_port_range, + ) + .unwrap(); + + let src_addr = "10.0.0.5".parse().unwrap(); + let dst_addr = "20.0.0.10".parse().unwrap(); + + // Should allow with matching ports + let vpcd_result = table.lookup(src_vpcd, &src_addr, &dst_addr, Some((1500, 80))); + assert_eq!(vpcd_result, Some(dst_vpcd)); + + // Should not allow with non-matching src port + let vpcd_result = table.lookup(src_vpcd, &src_addr, &dst_addr, Some((500, 80))); + assert!(vpcd_result.is_none()); + + // Should not allow with non-matching dst port + let vpcd_result = table.lookup(src_vpcd, &src_addr, &dst_addr, Some((1500, 443))); + assert!(vpcd_result.is_none()); + + // Should not allow without ports + let vpcd_result = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert!(vpcd_result.is_none()); + } + + #[test] + fn test_flow_filter_table_multiple_entries() { + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd1 = VpcdLookupResult::Single(vpcd(200)); + let dst_vpcd2 = VpcdLookupResult::Single(vpcd(300)); + + // Add two entries for different destination prefixes + table + .insert( + src_vpcd, + dst_vpcd1.clone(), + Prefix::from("10.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + table + .insert( + src_vpcd, + dst_vpcd2.clone(), + Prefix::from("10.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("30.0.0.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let src_addr = "10.0.0.5".parse().unwrap(); + + // Should route to dst_vpcd1 + let vpcd_result = table.lookup(src_vpcd, &src_addr, &"20.0.0.10".parse().unwrap(), None); + assert_eq!(vpcd_result, Some(dst_vpcd1)); + + // Should route to dst_vpcd2 + let vpcd_result = table.lookup(src_vpcd, &src_addr, &"30.0.0.10".parse().unwrap(), None); + assert_eq!(vpcd_result, Some(dst_vpcd2)); + } + + #[test] + fn test_vpc_connections_table_lookup() { + let mut table = VpcConnectionsTable::new(); + let dst_vpcd = VpcdLookupResult::Single(vpcd(200)); + + let src_prefix = Prefix::from("10.0.0.0/24"); + let dst_prefix = Prefix::from("20.0.0.0/24"); + + table + .insert( + dst_vpcd, + src_prefix, + OptionalPortRange::NoPortRangeMeansAllPorts, + dst_prefix, + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + // Lookup should succeed + let result = table.lookup(&"10.0.0.5".parse().unwrap(), None); + assert!(result.is_some()); + let (prefix, _) = result.unwrap(); + assert_eq!(prefix, src_prefix); + + // Lookup for non-matching address should fail + let result = table.lookup(&"11.0.0.5".parse().unwrap(), None); + assert!(result.is_none()); + } + + #[test] + fn test_vpc_connections_table_with_ports() { + let mut table = VpcConnectionsTable::new(); + let dst_vpcd = VpcdLookupResult::Single(vpcd(200)); + + let src_prefix = Prefix::from("10.0.0.0/24"); + let dst_prefix = Prefix::from("20.0.0.0/24"); + let src_port_range = OptionalPortRange::Some(PortRange::new(8080, 8090).unwrap()); + let dst_port_range = OptionalPortRange::NoPortRangeMeansAllPorts; + + table + .insert( + dst_vpcd, + src_prefix, + src_port_range, + dst_prefix, + dst_port_range, + ) + .unwrap(); + + // Lookup with matching port + let result = table.lookup(&"10.0.0.5".parse().unwrap(), Some(8085)); + assert!(result.is_some()); + + // Lookup with non-matching port + let result = table.lookup(&"10.0.0.5".parse().unwrap(), Some(9000)); + assert!(result.is_none()); + } + + #[test] + fn test_optional_port_range_from() { + let from_some = OptionalPortRange::from(Some(PortRange::new(80, 80).unwrap())); + assert!(matches!(from_some, OptionalPortRange::Some(_))); + + let from_none = OptionalPortRange::from(None); + assert!(matches!( + from_none, + OptionalPortRange::NoPortRangeMeansAllPorts + )); + } + + #[test] + fn test_flow_filter_table_ipv6() { + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd = VpcdLookupResult::Single(vpcd(200)); + + let src_prefix = Prefix::from("2001:db8::/32"); + let dst_prefix = Prefix::from("2001:db9::/32"); + + table + .insert( + src_vpcd, + dst_vpcd.clone(), + src_prefix, + OptionalPortRange::NoPortRangeMeansAllPorts, + dst_prefix, + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + let src_addr = "2001:db8::1".parse().unwrap(); + let dst_addr = "2001:db9::1".parse().unwrap(); + let vpcd_result = table.lookup(src_vpcd, &src_addr, &dst_addr, None); + assert_eq!(vpcd_result, Some(dst_vpcd)); + } + + #[test] + fn test_flow_filter_table_longest_prefix_match() { + let mut table = FlowFilterTable::new(); + let src_vpcd = vpcd(100); + let dst_vpcd1 = VpcdLookupResult::Single(vpcd(200)); + let dst_vpcd2 = VpcdLookupResult::Single(vpcd(300)); + + // Insert broader prefix + table + .insert( + src_vpcd, + dst_vpcd1.clone(), + Prefix::from("10.0.0.0/16"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.0.0/16"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + // Insert more specific prefix + table + .insert( + src_vpcd, + dst_vpcd2.clone(), + Prefix::from("10.0.1.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + Prefix::from("20.0.1.0/24"), + OptionalPortRange::NoPortRangeMeansAllPorts, + ) + .unwrap(); + + // Should match the more specific prefix for source + let vpcd_result = table.lookup( + src_vpcd, + &"10.0.1.5".parse().unwrap(), + &"20.0.1.10".parse().unwrap(), + None, + ); + assert_eq!(vpcd_result, Some(dst_vpcd2)); + + // Should match the broader prefix for source + let vpcd_result = table.lookup( + src_vpcd, + &"10.0.2.5".parse().unwrap(), + &"20.0.2.10".parse().unwrap(), + None, + ); + assert_eq!(vpcd_result, Some(dst_vpcd1)); + } + + #[test] + fn test_flow_filter_table_no_src_vpcd() { + let table = FlowFilterTable::new(); + let src_vpcd = vpcd(999); // Non-existent VPC + + let vpcd_result = table.lookup( + src_vpcd, + &"10.0.0.1".parse().unwrap(), + &"20.0.0.1".parse().unwrap(), + None, + ); + assert!(vpcd_result.is_none()); + } +} diff --git a/lpm/src/prefix/mod.rs b/lpm/src/prefix/mod.rs index d25ab7744..6fa5ac168 100644 --- a/lpm/src/prefix/mod.rs +++ b/lpm/src/prefix/mod.rs @@ -297,6 +297,104 @@ impl Prefix { result } + /// Merge two contiguous prefixes, if possible + /// + /// # Returns + /// + /// - `Some(parent)` if the prefixes are contiguous and can be merged + /// - `None` if the prefixes are not contiguous or have different lengths + /// + /// # Example + /// + /// ```rust + /// # use dataplane_lpm::prefix::{Prefix, Ipv4Prefix, Ipv6Prefix}; + /// # use std::str::FromStr; + /// fn prefix_v4(s: &str) -> Prefix { + /// Prefix::from(Ipv4Prefix::from_str(s).unwrap()) + /// } + /// fn prefix_v6(s: &str) -> Prefix { + /// Prefix::from(Ipv6Prefix::from_str(s).unwrap()) + /// } + /// + /// let prefix1 = prefix_v4("1.0.1.0/25"); + /// let prefix2 = prefix_v4("1.0.1.128/25"); + /// assert_eq!( + /// prefix1.merge(&prefix2), + /// Some(prefix_v4("1.0.1.0/24")) + /// ); + /// + /// let prefix1 = prefix_v4("1.0.0.0/24"); + /// let prefix2 = prefix_v4("1.0.1.0/24"); + /// assert_eq!( + /// prefix1.merge(&prefix2), + /// Some(prefix_v4("1.0.0.0/23")) + /// ); + /// + /// let prefix1 = prefix_v4("1.0.0.0/16"); + /// let prefix2 = prefix_v4("1.0.1.0/24"); + /// assert_eq!( + /// prefix1.merge(&prefix2), + /// Some(prefix_v4("1.0.0.0/16")) + /// ); + /// + /// let prefix1 = prefix_v4("1.0.0.0/24"); + /// let prefix2 = prefix_v4("1.0.200.0/24"); + /// assert_eq!( + /// prefix1.merge(&prefix2), + /// None + /// ); + /// + /// // Contiguous but not forming a valid CIDR + /// let prefix1 = prefix_v4("1.0.1.0/24"); + /// let prefix2 = prefix_v4("1.0.2.0/24"); + /// assert_eq!( + /// prefix1.merge(&prefix2), + /// None + /// ); + /// + /// let prefix1 = prefix_v4("0.0.0.0/0"); + /// let prefix2 = prefix_v6("::/0"); + /// assert_eq!( + /// prefix1.merge(&prefix2), + /// None + /// ); + /// ``` + #[must_use] + pub fn merge(&self, other: &Self) -> Option { + if self.covers(other) { + return Some(*self); + } + if other.covers(self) { + return Some(*other); + } + if self.length() != other.length() { + return None; + } + if self.is_ipv4() != other.is_ipv4() { + return None; + } + + // We can't have 0-length, because both prefixes have equal length but are distinct and of + // different IP version. + debug_assert_ne!(self.length(), 0); + debug_assert_ne!(other.length(), 0); + + let parent = match self { + Prefix::IPV4(prefix) => { + Prefix::IPV4(Ipv4Prefix::new(prefix.network(), prefix.len() - 1).ok()?) + } + Prefix::IPV6(prefix) => { + Prefix::IPV6(Ipv6Prefix::new(prefix.network(), prefix.len() - 1).ok()?) + } + }; + if parent.covers(other) { + // The immediate parent CIDR covers both distinct prefixes, so we're good + Some(parent) + } else { + None + } + } + #[cfg(any(test, feature = "testing"))] #[allow(clippy::missing_panics_doc)] pub fn expect_from(val: T) -> Self diff --git a/lpm/src/prefix/range_map.rs b/lpm/src/prefix/range_map.rs index df5b7a2a4..2e5c6cf47 100644 --- a/lpm/src/prefix/range_map.rs +++ b/lpm/src/prefix/range_map.rs @@ -51,6 +51,10 @@ where self.0.get(range) } + pub fn get_mut(&mut self, range: &R) -> Option<&mut V> { + self.0.get_mut(range) + } + pub fn lookup(&self, key: &K) -> Option<(&R, &V)> where R: UpperBoundFrom + RangeBounds, @@ -81,6 +85,10 @@ where pub fn range_mut(&mut self, range: impl RangeBounds) -> impl Iterator { self.0.range_mut(range) } + + pub fn keys(&self) -> impl Iterator { + self.0.keys() + } } impl Default for DisjointRangesBTreeMap { diff --git a/lpm/src/prefix/with_ports.rs b/lpm/src/prefix/with_ports.rs index b3db61696..6aa1e798b 100644 --- a/lpm/src/prefix/with_ports.rs +++ b/lpm/src/prefix/with_ports.rs @@ -29,6 +29,10 @@ pub trait IpRangeWithPorts { Self: Sized; /// Returns the subtraction of the two ranges, if any. fn subtract(&self, other: &Self) -> Vec + where + Self: Sized; + /// Returns the merge of the two ranges, if any. + fn merge(&self, other: &Self) -> Option where Self: Sized; /// Returns the total number of (IP, port) combinations covered by the IP and port ranges. @@ -111,6 +115,22 @@ impl IpRangeWithPorts for PrefixWithPorts { } result } + + fn merge(&self, other: &Self) -> Option { + if self.prefix == other.prefix { + Some(PrefixWithPorts::new( + self.prefix, + self.ports.merge(other.ports)?, + )) + } else if self.ports == other.ports { + Some(PrefixWithPorts::new( + self.prefix.merge(&other.prefix)?, + self.ports, + )) + } else { + None + } + } } /// A structure containing a prefix and an optional port range. @@ -290,6 +310,44 @@ impl IpRangeWithPorts for PrefixWithOptionalPorts { ) => convert_result_type(self_prefix.subtract(other_prefix), false), } } + + fn merge(&self, other: &Self) -> Option { + match (self, other) { + ( + PrefixWithOptionalPorts::Prefix(self_prefix), + PrefixWithOptionalPorts::Prefix(other_prefix), + ) => self_prefix + .merge(other_prefix) + .map(PrefixWithOptionalPorts::Prefix), + ( + PrefixWithOptionalPorts::PrefixPorts(self_prefix_with_ports), + PrefixWithOptionalPorts::PrefixPorts(other_prefix_with_ports), + ) => self_prefix_with_ports + .merge(other_prefix_with_ports) + .map(PrefixWithOptionalPorts::PrefixPorts), + ( + PrefixWithOptionalPorts::PrefixPorts(prefix_with_ports), + PrefixWithOptionalPorts::Prefix(prefix), + ) + | ( + PrefixWithOptionalPorts::Prefix(prefix), + PrefixWithOptionalPorts::PrefixPorts(prefix_with_ports), + ) => { + if prefix_with_ports.prefix() == *prefix { + // Same IP prefix, and one of them covers all of the ports + Some(PrefixWithOptionalPorts::Prefix(*prefix)) + } else if prefix_with_ports.ports().is_max_range() { + // Same (full) port ranges, try merging the prefixes + prefix + .merge(&prefix_with_ports.prefix()) + .map(PrefixWithOptionalPorts::Prefix) + } else { + // Different IP ranges and ports, nothing we can do + None + } + } + } + } } /// Error type for [`PortRange`] operations. @@ -459,12 +517,12 @@ impl PortRange { Some(self.start + offset) } - /// Merges the given range into this range if possible. + /// Merges the given disjoint range into this range if possible. /// /// # Returns /// /// Returns `Some(())` if the ranges were merged, `None` otherwise. - pub fn merge(&mut self, next: PortRange) -> Option<()> { + pub fn extend_right(&mut self, next: PortRange) -> Option<()> { if self.start > next.start || self.end >= next.start { return None; } @@ -474,6 +532,18 @@ impl PortRange { } None } + + // Return a merged range if the two ranges overlap or are adjacent + #[must_use] + pub fn merge(&self, other: Self) -> Option { + let (left, right) = (self.min(&other), self.max(&other)); + if u32::from(left.end) + 1 < u32::from(right.start) { + None + } else { + // We know we have left.start <= right.end given that left <= right + Some(PortRange::new(left.start, right.end).unwrap_or_else(|_| unreachable!())) + } + } } // Used for DisjointRangesBTreeMap @@ -974,6 +1044,169 @@ mod tests { ); } + // PrefixWithOptionalPorts - merge + + #[test] + fn test_prefix_with_optional_ports_merge_both_prefix_adjacent() { + // Two adjacent prefixes without ports should merge + let pwop1 = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.0/25"), None); + let pwop2 = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.128/25"), None); + + let merged = pwop1.merge(&pwop2).expect("Should merge"); + assert_eq!(merged.prefix(), prefix_v4("10.0.0.0/24")); + assert_eq!(merged.ports(), None); + } + + #[test] + fn test_prefix_with_optional_ports_merge_both_prefix_not_adjacent() { + // Two non-adjacent prefixes without ports should not merge + let pwop1 = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.0/24"), None); + let pwop2 = PrefixWithOptionalPorts::new(prefix_v4("10.0.2.0/24"), None); + + let merged = pwop1.merge(&pwop2); + assert!(merged.is_none()); + } + + #[test] + fn test_prefix_with_optional_ports_merge_both_prefix_identical() { + // Two identical prefixes without ports should merge + let pwop = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.0/24"), None); + + let merged = pwop.merge(&pwop).expect("Should merge"); + assert_eq!(merged, pwop); + } + + #[test] + fn test_prefix_with_optional_ports_merge_both_prefix_ports_same_prefix() { + // Same prefix with adjacent port ranges should merge + let prefix = prefix_v4("10.0.0.0/24"); + let pwop1 = PrefixWithOptionalPorts::new(prefix, Some(PortRange::new(80, 100).unwrap())); + let pwop2 = PrefixWithOptionalPorts::new(prefix, Some(PortRange::new(101, 200).unwrap())); + + let merged = pwop1.merge(&pwop2).expect("Should merge"); + assert_eq!(merged.prefix(), prefix); + assert_eq!(merged.ports(), Some(PortRange::new(80, 200).unwrap())); + } + + #[test] + fn test_prefix_with_optional_ports_merge_both_prefix_ports_same_ports() { + // Adjacent prefixes with same port range should merge + let ports = PortRange::new(80, 100).unwrap(); + let pwop1 = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.0/25"), Some(ports)); + let pwop2 = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.128/25"), Some(ports)); + + let merged = pwop1.merge(&pwop2).expect("Should merge"); + assert_eq!(merged.prefix(), prefix_v4("10.0.0.0/24")); + assert_eq!(merged.ports(), Some(ports)); + } + + #[test] + fn test_prefix_with_optional_ports_merge_both_prefix_ports_different_both() { + // Different prefixes and different port ranges should not merge + let pwop1 = PrefixWithOptionalPorts::new( + prefix_v4("10.0.0.0/24"), + Some(PortRange::new(80, 100).unwrap()), + ); + let pwop2 = PrefixWithOptionalPorts::new( + prefix_v4("10.0.1.0/24"), + Some(PortRange::new(101, 300).unwrap()), + ); + + let merged = pwop1.merge(&pwop2); + assert!(merged.is_none()); + } + + #[test] + fn test_prefix_with_optional_ports_merge_both_prefix_ports_identical() { + // Two identical PrefixWithPorts should merge to themselves + let pwop = PrefixWithOptionalPorts::new( + prefix_v4("10.0.0.0/24"), + Some(PortRange::new(80, 100).unwrap()), + ); + + let merged = pwop.merge(&pwop).expect("Should merge"); + assert_eq!(merged, pwop); + } + + #[test] + fn test_prefix_with_optional_ports_merge_prefix_and_prefix_ports_same_prefix() { + // Same prefix, one with ports and one without (covers all ports) + // Should merge to prefix without ports (all ports) + let prefix = prefix_v4("10.0.0.0/24"); + let pwop1 = PrefixWithOptionalPorts::new(prefix, None); + let pwop2 = PrefixWithOptionalPorts::new(prefix, Some(PortRange::new(80, 100).unwrap())); + + let merged = pwop1.merge(&pwop2).expect("Should merge"); + assert_eq!(merged.prefix(), prefix); + assert_eq!(merged.ports(), None); + + // Test symmetry + let merged2 = pwop2.merge(&pwop1).expect("Should merge"); + assert_eq!(merged2, merged); + } + + #[test] + fn test_prefix_with_optional_ports_merge_prefix_and_prefix_ports_max_range() { + // Different prefixes but PrefixPorts has max port range (equivalent to no ports) + let pwop1 = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.0/25"), None); + let pwop2 = PrefixWithOptionalPorts::new( + prefix_v4("10.0.0.128/25"), + Some(PortRange::new_max_range()), + ); + + let merged = pwop1.merge(&pwop2).expect("Should merge"); + assert_eq!(merged.prefix(), prefix_v4("10.0.0.0/24")); + assert_eq!(merged.ports(), None); + } + + #[test] + fn test_prefix_with_optional_ports_merge_prefix_and_prefix_ports_different_prefix_limited_ports() + { + // Different prefixes and PrefixPorts has limited port range + let pwop1 = PrefixWithOptionalPorts::new(prefix_v4("10.0.0.0/24"), None); + let pwop2 = PrefixWithOptionalPorts::new( + prefix_v4("10.0.1.0/24"), + Some(PortRange::new(80, 100).unwrap()), + ); + + let merged = pwop1.merge(&pwop2); + assert!(merged.is_none()); + } + + #[test] + fn test_prefix_with_optional_ports_merge_ipv6() { + // Test merging with IPv6 prefixes + let pwop1 = PrefixWithOptionalPorts::new(prefix_v6("2001:db8::/33"), None); + let pwop2 = PrefixWithOptionalPorts::new(prefix_v6("2001:db8:8000::/33"), None); + + let merged = pwop1.merge(&pwop2).expect("Should merge"); + assert_eq!(merged.prefix(), prefix_v6("2001:db8::/32")); + assert_eq!(merged.ports(), None); + } + + #[test] + fn test_prefix_with_optional_ports_merge_overlapping_port_ranges() { + // Overlapping (not just adjacent) port ranges should merge + let prefix = prefix_v4("10.0.0.0/24"); + let pwop1 = PrefixWithOptionalPorts::new(prefix, Some(PortRange::new(80, 150).unwrap())); + let pwop2 = PrefixWithOptionalPorts::new(prefix, Some(PortRange::new(100, 200).unwrap())); + + let merged = pwop1.merge(&pwop2).expect("Should merge"); + assert_eq!(merged.prefix(), prefix); + assert_eq!(merged.ports(), Some(PortRange::new(80, 200).unwrap())); + } + + #[test] + fn test_prefix_with_optional_ports_merge_non_adjacent_port_ranges() { + // Non-adjacent port ranges should not merge + let prefix = prefix_v4("10.0.0.0/24"); + let pwop1 = PrefixWithOptionalPorts::new(prefix, Some(PortRange::new(80, 100).unwrap())); + let pwop2 = PrefixWithOptionalPorts::new(prefix, Some(PortRange::new(200, 300).unwrap())); + + let merged = pwop1.merge(&pwop2); + assert!(merged.is_none()); + } + // PortRange - FromStr #[test] diff --git a/lpm/src/trie/ip_port_prefix_trie.rs b/lpm/src/trie/ip_port_prefix_trie.rs new file mode 100644 index 000000000..405119cd0 --- /dev/null +++ b/lpm/src/trie/ip_port_prefix_trie.rs @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! [`IpPortPrefixTrie`] is an [`IpPrefixTrie`] variant with support for port ranges. +//! +//! The struct provides a way to look up a tuple (IP address, port) in a trie of IP prefixes with +//! associated (optional) port ranges. + +use crate::prefix::Prefix; +use crate::trie::IpPrefixTrie; +use std::fmt::Debug; +use std::net::IpAddr; + +/// Trait for trie values to associate with IP prefixes. This values contain a port range. How the +/// port range is implemented does not matter, but it must implement the methods in this trait, for +/// the lookup to work correctly. +pub trait ValueWithAssociatedRanges { + // Return true if the port range in the value covers all existing port values. + // This is typically the case if the port range is optional and empty, in which case the IP + // prefix is assumed to apply to all ports. + fn covers_all_ports(&self) -> bool; + // Return true if the port range in the value covers the given port. + fn covers_port(&self, port: u16) -> bool; +} + +/// An [`IpPrefixTrie`] variant with support for port ranges, for disjoint combinations of IP +/// prefixes and port ranges. +/// +/// The struct provides a way to look up a tuple (IP address, port) in a trie of IP prefixes with +/// associated (optional) port ranges. +/// +/// Internally, it is a LPM trie with IP prefixes as keys. Each key is associated with a value that +/// contains a port range. The lookup is more complex than a simple LPM lookup: we need to find the +/// prefix, but also the port range associated to an (IP address, port) tuple. All prefixes with +/// their associated port range are disjoint, but we can have colliding or identical prefixes, with +/// disjoint port ranges. So the lookup works this way: +/// +/// - Iterate over all IP prefixes matching the given IP address +/// - For each matching prefix, check if the port range associated with it covers the given port +/// - Return the first match we find: as the combinations (IP prefix, port range) are disjoint, +/// there can be no more than one match. +#[derive(Debug, Clone)] +pub struct IpPortPrefixTrie(IpPrefixTrie) +where + V: Debug + Clone + ValueWithAssociatedRanges; + +impl IpPortPrefixTrie +where + V: Debug + Clone + ValueWithAssociatedRanges, +{ + /// Create a new empty [`IpPortPrefixTrie`]. + #[must_use] + pub fn new() -> Self { + Self(IpPrefixTrie::new()) + } + + /// Create a new [`IpPortPrefixTrie`] with a single prefix and value. + #[must_use] + pub fn from(prefix: Prefix, value: V) -> Self { + let mut trie = Self::new(); + trie.0.insert(prefix, value); + trie + } + + /// Insert a prefix and value into the trie. + pub fn insert(&mut self, prefix: Prefix, value: V) { + self.0.insert(prefix, value); + } + + /// Get a mutable reference to the value associated with a prefix. + pub fn get_mut(&mut self, prefix: Prefix) -> Option<&mut V> { + self.0.get_mut(prefix) + } + + /// Look up an IP address and optional port in the trie. + /// + /// Returns the longest matching prefix and its associated value, if any. + /// + /// See the documentation of [`IpPortPrefixTrie`] for details on the lookup logic. + pub fn lookup(&self, addr: &IpAddr, port_opt: Option) -> Option<(Prefix, &V)> { + // If the longest matching prefix has no associated port range, we assume it matches any + // port, so the lookup is successful + if let Some((prefix, value)) = self.0.lookup(*addr) + && value.covers_all_ports() + { + return Some((prefix, value)); + } + + // Else, we need to check all matching IP prefixes (not necessarily the longest), and their + // port ranges. We expect the trie to contain only one matching IP prefix matching the + // address and associated to a port range matching the port, so we return the first we find. + let port = port_opt?; + let matching_entries = self.0.matching_entries(*addr); + for (prefix, value) in matching_entries { + if value.covers_port(port) { + return Some((prefix, value)); + } + } + None + } +} + +impl Default for IpPortPrefixTrie +where + V: Debug + Clone + ValueWithAssociatedRanges, +{ + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::prefix::{PortRange, Prefix}; + use std::collections::BTreeSet; + use std::ops::RangeBounds; + + #[derive(Debug, Clone)] + enum TestValue { + AnyPort, + Ranges(BTreeSet), + } + + impl ValueWithAssociatedRanges for TestValue { + fn covers_all_ports(&self) -> bool { + match self { + TestValue::AnyPort => true, + TestValue::Ranges(ranges) => { + ranges.iter().fold(0, |sum, range| sum + range.len()) == PortRange::MAX_LENGTH + } + } + } + + fn covers_port(&self, port: u16) -> bool { + match self { + TestValue::AnyPort => true, + TestValue::Ranges(ranges) => ranges.iter().any(|range| range.contains(&port)), + } + } + } + + #[test] + fn test_new() { + let trie: IpPortPrefixTrie = IpPortPrefixTrie::new(); + assert!(trie.lookup(&"192.168.1.1".parse().unwrap(), None).is_none()); + } + + #[test] + fn test_from() { + let prefix = Prefix::from("192.168.1.0/24"); + let value = TestValue::AnyPort; + let trie = IpPortPrefixTrie::from(prefix, value); + + let result = trie.lookup(&"192.168.1.5".parse().unwrap(), None); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix); + } + + #[test] + fn test_insert_and_lookup_any_port() { + let mut trie = IpPortPrefixTrie::new(); + let prefix = Prefix::from("10.0.0.0/16"); + let value = TestValue::AnyPort; + + trie.insert(prefix, value); + + // Should match with any port + let result = trie.lookup(&"10.0.1.5".parse().unwrap(), Some(80)); + assert!(result.is_some()); + let (matched_prefix, matched_value) = result.unwrap(); + assert_eq!(matched_prefix, prefix); + assert!(matches!(matched_value, TestValue::AnyPort)); + + // Should match without port + let result = trie.lookup(&"10.0.1.5".parse().unwrap(), None); + assert!(result.is_some()); + } + + #[test] + fn test_insert_and_lookup_with_port_ranges() { + let mut trie = IpPortPrefixTrie::new(); + let prefix = Prefix::from("172.16.0.0/12"); + let ranges = BTreeSet::from([PortRange::new(80, 90).unwrap()]); + let value = TestValue::Ranges(ranges); + + trie.insert(prefix, value); + + // Should match port in range + let result = trie.lookup(&"172.16.5.10".parse().unwrap(), Some(85)); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix); + + // Should not match port outside range + let result = trie.lookup(&"172.16.5.10".parse().unwrap(), Some(100)); + assert!(result.is_none()); + + // Should not match without port + let result = trie.lookup(&"172.16.5.10".parse().unwrap(), None); + assert!(result.is_none()); + } + + #[test] + fn test_lookup_longest_prefix_match_no_ports() { + let mut trie = IpPortPrefixTrie::new(); + + // Insert prefix with port range + let prefix_with_ports = Prefix::from("192.168.0.0/24"); + let ranges = BTreeSet::from([PortRange::new(80, 90).unwrap()]); + trie.insert(prefix_with_ports, TestValue::Ranges(ranges)); + + // Insert prefix covering all ports + let prefix_alone = Prefix::from("192.168.1.0/24"); + trie.insert(prefix_alone, TestValue::AnyPort); + + // Match wihout port + let result = trie.lookup(&"192.168.1.5".parse().unwrap(), None); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix_alone); + + // Match with a port + let result = trie.lookup(&"192.168.1.5".parse().unwrap(), Some(443)); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix_alone); + + // Fail to match prefix_with_ports without a port + let result = trie.lookup(&"192.168.0.5".parse().unwrap(), None); + assert!(result.is_none()); + + // Match with a port + let result = trie.lookup(&"192.168.0.5".parse().unwrap(), Some(80)); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix_with_ports); + } + + #[test] + fn test_lookup_longest_prefix_match_with_ports() { + let mut trie = IpPortPrefixTrie::new(); + + // Insert broader prefix + let prefix_16 = Prefix::from("192.168.0.0/16"); + let ranges = BTreeSet::from([PortRange::new(80, 90).unwrap()]); + trie.insert(prefix_16, TestValue::Ranges(ranges)); + + // Insert more specific prefix + let prefix_24 = Prefix::from("192.168.1.0/24"); + let ranges = BTreeSet::from([PortRange::new(443, 443).unwrap()]); + trie.insert(prefix_24, TestValue::Ranges(ranges)); + + // Without port, there is not match + let result = trie.lookup(&"192.168.1.5".parse().unwrap(), None); + assert!(result.is_none()); + + // Based on port, we match the more specific prefix + let result = trie.lookup(&"192.168.1.5".parse().unwrap(), Some(443)); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix_24); + + // Based on port, we match the broader prefix + let result = trie.lookup(&"192.168.1.5".parse().unwrap(), Some(80)); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix_16); + } + + #[test] + fn test_get_mut() { + let mut trie = IpPortPrefixTrie::new(); + let prefix = Prefix::from("203.0.113.0/24"); + let ranges = BTreeSet::from([PortRange::new(8080, 8090).unwrap()]); + trie.insert(prefix, TestValue::Ranges(ranges)); + + // Modify the value + if let Some(value) = trie.get_mut(prefix) { + *value = TestValue::AnyPort; + } + + // Should now match with any port + let result = trie.lookup(&"203.0.113.5".parse().unwrap(), Some(9999)); + assert!(result.is_some()); + let (_, matched_value) = result.unwrap(); + assert!(matches!(matched_value, TestValue::AnyPort)); + } + + #[test] + fn test_ipv6_lookup() { + let mut trie = IpPortPrefixTrie::new(); + let prefix = Prefix::from("2001:db8::/32"); + trie.insert(prefix, TestValue::AnyPort); + + let result = trie.lookup(&"2001:db8::1".parse().unwrap(), None); + assert!(result.is_some()); + let (matched_prefix, _) = result.unwrap(); + assert_eq!(matched_prefix, prefix); + + let result = trie.lookup(&"2001:db9::1".parse().unwrap(), None); + assert!(result.is_none()); + } + + #[test] + fn test_covers_all_ports() { + let any_port = TestValue::AnyPort; + assert!(any_port.covers_all_ports()); + + let mut ranges = BTreeSet::new(); + ranges.insert(PortRange::new(0, 32767).unwrap()); + ranges.insert(PortRange::new(32768, 65535).unwrap()); + let full_range = TestValue::Ranges(ranges); + assert!(full_range.covers_all_ports()); + + let partial_ranges = BTreeSet::from([PortRange::new(80, 443).unwrap()]); + let partial_range = TestValue::Ranges(partial_ranges); + assert!(!partial_range.covers_all_ports()); + } + + #[test] + fn test_covers_port() { + let any_port = TestValue::AnyPort; + assert!(any_port.covers_port(80)); + assert!(any_port.covers_port(65535)); + + let mut ranges = BTreeSet::new(); + ranges.insert(PortRange::new(80, 80).unwrap()); + ranges.insert(PortRange::new(443, 443).unwrap()); + let specific_ports = TestValue::Ranges(ranges); + assert!(specific_ports.covers_port(80)); + assert!(specific_ports.covers_port(443)); + assert!(!specific_ports.covers_port(8080)); + } +} diff --git a/lpm/src/trie/mod.rs b/lpm/src/trie/mod.rs index eef40f015..bfe3767ce 100644 --- a/lpm/src/trie/mod.rs +++ b/lpm/src/trie/mod.rs @@ -5,6 +5,9 @@ use crate::prefix::{IpPrefix, Ipv4Prefix, Ipv6Prefix, Prefix}; use std::borrow::Borrow; use std::net::IpAddr; +mod ip_port_prefix_trie; +pub use ip_port_prefix_trie::{IpPortPrefixTrie, ValueWithAssociatedRanges}; + mod prefix_map_impl; pub use prefix_map_impl::*; @@ -101,6 +104,16 @@ impl IpPrefixTrie { } } + pub fn get_mut(&mut self, prefix: Q) -> Option<&mut V> + where + Q: Into, + { + match prefix.into() { + Prefix::IPV4(prefix) => self.ipv4.get_mut(prefix), + Prefix::IPV6(prefix) => self.ipv6.get_mut(prefix), + } + } + pub fn matching_entries(&self, addr: Q) -> Box + '_> where Q: Into, diff --git a/mgmt/Cargo.toml b/mgmt/Cargo.toml index d846d7324..5ef11bd7f 100644 --- a/mgmt/Cargo.toml +++ b/mgmt/Cargo.toml @@ -20,6 +20,7 @@ bolero = ["dep:bolero", "interface-manager/bolero", "id/bolero", "net/bolero", " args = { workspace = true } config = { workspace = true } concurrency = { workspace = true } +flow-filter = { workspace = true } id = { workspace = true } interface-manager = { workspace = true } k8s-intf = { workspace = true } @@ -27,7 +28,6 @@ k8s-less = { workspace = true } lpm = { workspace = true } nat = { workspace = true } net = { workspace = true } -pkt-meta = { workspace = true } rekon = { workspace = true } routing = { workspace = true } stats = { workspace = true } diff --git a/mgmt/src/processor/proc.rs b/mgmt/src/processor/proc.rs index 0aa4fb1df..d936feb59 100644 --- a/mgmt/src/processor/proc.rs +++ b/mgmt/src/processor/proc.rs @@ -19,11 +19,10 @@ use config::{external::overlay::Overlay, internal::device::tracecfg::TracingConf use crate::processor::confbuild::internal::build_internal_config; use crate::processor::confbuild::router::generate_router_config; +use flow_filter::{FlowFilterTable, FlowFilterTableWriter}; use nat::stateful::NatAllocatorWriter; use nat::stateless::NatTablesWriter; use nat::stateless::setup::build_nat_configuration; -use pkt_meta::dst_vpcd_lookup::VpcDiscTablesWriter; -use pkt_meta::dst_vpcd_lookup::setup::build_dst_vni_lookup_configuration; use crate::processor::display::ConfigHistory; use crate::processor::gwconfigdb::GwConfigDatabase; @@ -81,8 +80,8 @@ pub struct ConfigProcessorParams { // writer for stateful NAT allocator pub natallocatorw: NatAllocatorWriter, - // writer for VPC routing table - pub vpcdtablesw: VpcDiscTablesWriter, + // writer for flow filter table + pub flowfilterw: FlowFilterTableWriter, // store for vpc stats pub vpc_stats_store: Arc, @@ -482,13 +481,12 @@ fn apply_stateful_nat_config( Ok(()) } -/// Update the VNI tables for dst_vni_lookup -fn apply_dst_vpcd_lookup_config( +fn apply_flow_filtering_config( overlay: &Overlay, - vpcdtablesw: &mut VpcDiscTablesWriter, + flowfilterw: &mut FlowFilterTableWriter, ) -> ConfigResult { - let vpcd_tables = build_dst_vni_lookup_configuration(overlay)?; - vpcdtablesw.update_vpcd_tables(vpcd_tables); + let flow_filter_table = FlowFilterTable::build_from_overlay(overlay)?; + flowfilterw.update_flow_filter_table(flow_filter_table); Ok(()) } @@ -522,7 +520,7 @@ impl ConfigProcessor { let vpcmapw = &mut self.proc_params.vpcmapw; let nattablesw = &mut self.proc_params.nattablesw; let natallocatorw = &mut self.proc_params.natallocatorw; - let vpcdtablesw = &mut self.proc_params.vpcdtablesw; + let flowfilterw = &mut self.proc_params.flowfilterw; /* build internal config if it hasn't been built */ if config.internal.is_none() { @@ -559,8 +557,8 @@ impl ConfigProcessor { /* apply stateful NAT config */ apply_stateful_nat_config(&config.external.overlay.vpc_table, natallocatorw)?; - /* apply dst_vpcd_lookup config */ - apply_dst_vpcd_lookup_config(&config.external.overlay, vpcdtablesw)?; + /* apply flow filtering config */ + apply_flow_filtering_config(&config.external.overlay, flowfilterw)?; /* update stats mappings and seed names to the stats store */ let _ = update_stats_vpc_mappings(config, vpcmapw); diff --git a/mgmt/src/tests/mgmt.rs b/mgmt/src/tests/mgmt.rs index 33337f4b8..f3f539816 100644 --- a/mgmt/src/tests/mgmt.rs +++ b/mgmt/src/tests/mgmt.rs @@ -11,12 +11,12 @@ pub mod test { use config::external::gwgroup::GwGroupTable; use fixin::wrap; + use flow_filter::FlowFilterTableWriter; use lpm::prefix::Prefix; use nat::stateful::NatAllocatorWriter; use nat::stateless::NatTablesWriter; use net::eth::mac::Mac; use net::interface::Mtu; - use pkt_meta::dst_vpcd_lookup::VpcDiscTablesWriter; use std::net::IpAddr; use std::net::Ipv4Addr; use std::str::FromStr; @@ -422,16 +422,16 @@ pub mod test { /* vpcmappings for vpc name resolution for vpc stats */ let vpcmapw = VpcMapWriter::::new(); - /* crate NatTables for stateless nat */ + /* create NatTables for stateless nat */ let nattablesw = NatTablesWriter::new(); - /* crate NatAllocator for stateful nat */ + /* create NatAllocator for stateful nat */ let natallocatorw = NatAllocatorWriter::new(); - /* crate VniTables for dst_vni_lookup */ - let vpcdtablesw = VpcDiscTablesWriter::new(); + /* create FlowFilterTable for flow filtering */ + let flowfilterw = FlowFilterTableWriter::new(); - /* NEW: VPC stats store (Arc) */ + /* create VPC stats store (Arc) */ let vpc_stats_store = VpcStatsStore::new(); /* build configuration of mgmt config processor */ @@ -440,7 +440,7 @@ pub mod test { vpcmapw, nattablesw, natallocatorw, - vpcdtablesw, + flowfilterw, vpc_stats_store, }; diff --git a/nat/Cargo.toml b/nat/Cargo.toml index bd20cc35c..9fac71863 100644 --- a/nat/Cargo.toml +++ b/nat/Cargo.toml @@ -14,6 +14,7 @@ arc-swap = { workspace = true } bnum = { workspace = true } concurrency = { workspace = true, features = [] } config = { workspace = true } +flow-filter = { workspace = true } flow-info = { workspace = true } left-right = { workspace = true } linkme = { workspace = true } diff --git a/nat/src/stateful/test.rs b/nat/src/stateful/test.rs index 6ad0da72f..122fad873 100644 --- a/nat/src/stateful/test.rs +++ b/nat/src/stateful/test.rs @@ -23,6 +23,7 @@ mod tests { use config::{ConfigError, GwConfig}; use etherparse::Icmpv4Type; use fixin::wrap; + use flow_filter::{FlowFilter, FlowFilterTable, FlowFilterTableWriter}; use net::buffer::{PacketBufferMut, TestBuffer}; use net::eth::mac::Mac; use net::headers::{ @@ -39,8 +40,6 @@ mod tests { use net::udp::{TruncatedUdp, UdpPort}; use net::vxlan::Vni; use pipeline::NetworkFunction; - use pkt_meta::dst_vpcd_lookup::setup::build_dst_vni_lookup_configuration; - use pkt_meta::dst_vpcd_lookup::{DstVpcdLookup, VpcDiscTablesWriter}; use pkt_meta::flow_table::flow_key::Uni; use pkt_meta::flow_table::{FlowKey, FlowTable, IpProtoKey, LookupNF, UdpProtoKey}; use std::net::{IpAddr, Ipv4Addr}; @@ -1175,7 +1174,7 @@ mod tests { #[allow(clippy::too_many_arguments)] fn check_packet_with_vpcd_lookup( nat: &mut StatefulNat, - vpcdlookup: &mut DstVpcdLookup, + vpcdlookup: &mut FlowFilter, flow_lookup_stage: Option<&mut LookupNF>, src_vni: Vni, src_ip: &str, @@ -1254,10 +1253,10 @@ mod tests { config.validate().unwrap(); // Build VPC discriminant lookup stage - let vpcd_tables = build_dst_vni_lookup_configuration(&config.external.overlay).unwrap(); - let mut vpcdtablesw = VpcDiscTablesWriter::new(); - vpcdtablesw.update_vpcd_tables(vpcd_tables); - let mut vpcdlookup = DstVpcdLookup::new("vpcd-lookup", vpcdtablesw.get_reader()); + let vpcd_tables = FlowFilterTable::build_from_overlay(&config.external.overlay).unwrap(); + let mut vpcdtablesw = FlowFilterTableWriter::new(); + vpcdtablesw.update_flow_filter_table(vpcd_tables); + let mut vpcdlookup = FlowFilter::new("vpcd-lookup", vpcdtablesw.get_reader()); ///////////////////////////////////////////////////////////////// // First NAT stage: We do not search for the destination VPC discriminant in the flow table. @@ -1551,10 +1550,10 @@ mod tests { ); // Build VPC discriminant lookup stage - let vpcd_tables = build_dst_vni_lookup_configuration(&config.external.overlay).unwrap(); - let mut vpcdtablesw = VpcDiscTablesWriter::new(); - vpcdtablesw.update_vpcd_tables(vpcd_tables); - let mut vpcdlookup = DstVpcdLookup::new("vpcd-lookup", vpcdtablesw.get_reader()); + let vpcd_tables = FlowFilterTable::build_from_overlay(&config.external.overlay).unwrap(); + let mut vpcdtablesw = FlowFilterTableWriter::new(); + vpcdtablesw.update_flow_filter_table(vpcd_tables); + let mut vpcdlookup = FlowFilter::new("vpcd-lookup", vpcdtablesw.get_reader()); // Build flow table lookup stage let flow_table = Arc::new(FlowTable::default()); diff --git a/nat/src/stateless/setup/mod.rs b/nat/src/stateless/setup/mod.rs index b9b7b39ff..59931038c 100644 --- a/nat/src/stateless/setup/mod.rs +++ b/nat/src/stateless/setup/mod.rs @@ -79,7 +79,7 @@ impl PerVniTable { let (prefix, value) = res?; // It's OK if the prefix already exists in the trie, we may try to insert it // multiple times if we have disjoint port ranges for this prefix. - let _ = peering_table.insert(prefix, value); + peering_table.insert(prefix, value); Ok(()) }) })?; @@ -94,7 +94,7 @@ impl PerVniTable { let (prefix, value) = res?; // It's OK if the prefix already exists in the trie, we may try to insert it // multiple times if we have disjoint port ranges for this prefix. - let _ = self.dst_nat.insert(prefix, value); + self.dst_nat.insert(prefix, value); Ok(()) }) })?; diff --git a/nat/src/stateless/setup/tables.rs b/nat/src/stateless/setup/tables.rs index 203e76aa8..beca96ae8 100644 --- a/nat/src/stateless/setup/tables.rs +++ b/nat/src/stateless/setup/tables.rs @@ -5,7 +5,7 @@ use ahash::RandomState; use bnum::cast::CastFrom; use lpm::prefix::range_map::{DisjointRangesBTreeMap, UpperBoundFrom}; use lpm::prefix::{IpPrefix, IpRangeWithPorts, PortRange, Prefix, PrefixSize, PrefixWithPortsSize}; -use lpm::trie::IpPrefixTrie; +use lpm::trie::{IpPortPrefixTrie, ValueWithAssociatedRanges}; use net::vxlan::Vni; use std::collections::{BTreeSet, HashMap}; use std::fmt::Debug; @@ -166,22 +166,18 @@ fn addr_offset_in_prefix_with_ports( /// From a current address prefix, find the target address prefix. #[derive(Debug, Default, Clone)] -pub struct NatRuleTable(IpPrefixTrie); +pub struct NatRuleTable(IpPortPrefixTrie); impl NatRuleTable { #[must_use] /// Creates a new empty [`NatRuleTable`] pub fn new() -> Self { - Self(IpPrefixTrie::new()) + Self(IpPortPrefixTrie::new()) } /// Inserts a new entry in the table - /// - /// # Returns - /// - /// Returns the previous value associated with the prefix if it existed, or `None` otherwise. - pub fn insert(&mut self, prefix: Prefix, value: NatTableValue) -> Option { - self.0.insert(prefix, value) + pub fn insert(&mut self, prefix: Prefix, value: NatTableValue) { + self.0.insert(prefix, value); } /// Looks up for the value associated with the given address. @@ -192,28 +188,7 @@ impl NatRuleTable { /// If the address does not match any prefix, it returns `None`. #[must_use] pub fn lookup(&self, addr: &IpAddr, port_opt: Option) -> Option<(Prefix, &NatTableValue)> { - // If we have a matching NatTableValue::Nat for the address, return it - let result = self.0.lookup(*addr); - if matches!(result, Some((_prefix, NatTableValue::Nat(_value)))) { - return result; - } - - // Else, we need to check all matching IP prefixes (not necessarily the longest), and their - // port ranges. We expect the trie to contain only one matching IP prefix matching the - // address and associated to a port range matching the port, so we return the first we find. - let port = port_opt?; - let matching_entries = self.0.matching_entries(*addr); - for (prefix, value) in matching_entries { - if let NatTableValue::Pat(pat_value) = value - && pat_value - .prefix_port_ranges - .iter() - .any(|pr| pr.contains(&port)) - { - return Some((prefix, value)); - } - } - None + self.0.lookup(addr, port_opt) } } @@ -227,6 +202,30 @@ pub enum NatTableValue { Pat(PortAddrTranslationValue), } +impl ValueWithAssociatedRanges for NatTableValue { + fn covers_all_ports(&self) -> bool { + match self { + NatTableValue::Nat(_) => true, + NatTableValue::Pat(value) => { + value + .prefix_port_ranges + .iter() + .fold(0, |sum, range| sum + range.len()) + == PortRange::MAX_LENGTH + } + } + } + + fn covers_port(&self, port: u16) -> bool { + match self { + NatTableValue::Nat(_) => true, + NatTableValue::Pat(value) => { + value.prefix_port_ranges.iter().any(|pr| pr.contains(&port)) + } + } + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct AddrTranslationValue { ranges_tree: DisjointRangesBTreeMap, @@ -355,7 +354,7 @@ impl PortAddrTranslationValue { .iter() .find(|pr| pr.contains(&key.start.port)) .unwrap_or_else(|| unreachable!()), - ) && merge_value.0.merge(&value.0).is_some() + ) && merge_value.0.extend_right(&value.0).is_some() { // Merge was successful, insert new value self.ranges_tree.insert(merge_key, merge_value); @@ -561,7 +560,7 @@ impl IpPortRange { // # Returns // // Returns `Some(())` if the ranges were merged, or `None` otherwise. - fn merge(&mut self, next: &IpPortRange) -> Option<()> { + fn extend_right(&mut self, next: &IpPortRange) -> Option<()> { // Always merge on the "right side". This is because we call this method assuming that // ranges are ordered (by IP range start, then port range start values), and we process the // smaller ones first; if we try to merge a new one into an existing one, it's a "bigger" @@ -569,12 +568,12 @@ impl IpPortRange { // Case 1: port ranges are identical if self.port_range == next.port_range { - return self.ip_range.merge(&next.ip_range); + return self.ip_range.extend_right(&next.ip_range); } // Case 2: IP ranges are identical if self.ip_range == next.ip_range { - return self.port_range.merge(next.port_range); + return self.port_range.extend_right(next.port_range); } None } @@ -606,6 +605,10 @@ impl IpRangeWithPorts for IpPortRange { fn subtract(&self, _other: &Self) -> Vec { unimplemented!() } + + fn merge(&self, _other: &Self) -> Option { + unimplemented!() + } } // Represents an IP address range, with a start and an end address. @@ -676,7 +679,7 @@ impl IpRange { // # Returns // // Returns `Some(())` if the ranges were merged, or `None` otherwise. - fn merge(&mut self, next: &IpRange) -> Option<()> { + fn extend_right(&mut self, next: &IpRange) -> Option<()> { // Always merge on the "right side". This is because we call this method when processing // ranges obtained from prefixes in a BTreeSet, so they are ordered, and we process the // smaller ones first; if we try to merge a new one into an existing one, it's a "bigger" diff --git a/pkt-meta/src/dst_vpcd_lookup/mod.rs b/pkt-meta/src/dst_vpcd_lookup/mod.rs deleted file mode 100644 index 9fd6ffe1f..000000000 --- a/pkt-meta/src/dst_vpcd_lookup/mod.rs +++ /dev/null @@ -1,494 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// Copyright Open Network Fabric Authors - -use left_right::{Absorb, ReadGuard, ReadHandle, ReadHandleFactory, WriteHandle, new_from_empty}; -use std::collections::HashMap; -use tracing::{debug, error, warn}; - -use lpm::prefix::range_map::DisjointRangesBTreeMap; -use lpm::prefix::{PortRange, Prefix, PrefixWithOptionalPorts}; -use lpm::trie::IpPrefixTrie; -use net::buffer::PacketBufferMut; -use net::headers::{TryHeaders, TryIp, TryTransport}; -use net::packet::{DoneReason, Packet, VpcDiscriminant}; -use pipeline::NetworkFunction; -use std::net::IpAddr; -use std::num::NonZero; - -pub mod setup; - -use tracectl::trace_target; -trace_target!("vpc-routing", LevelFilter::INFO, &["pipeline"]); - -#[derive(thiserror::Error, Debug, Clone)] -pub enum DstVpcdLookupError { - #[error("Error building dst_vpcd_lookup table: {0}")] - BuildError(String), -} - -#[derive(Debug, Clone)] -pub struct VpcDiscriminantTables { - tables_by_discriminant: HashMap, -} - -impl VpcDiscriminantTables { - #[must_use] - pub fn new() -> Self { - Self { - tables_by_discriminant: HashMap::new(), - } - } -} - -impl Default for VpcDiscriminantTables { - fn default() -> Self { - Self::new() - } -} - -#[derive(Debug)] -enum VpcDiscriminantTablesChange { - UpdateVpcDiscTables(VpcDiscriminantTables), -} - -impl Absorb for VpcDiscriminantTables { - fn absorb_first(&mut self, change: &mut VpcDiscriminantTablesChange, _: &Self) { - match change { - VpcDiscriminantTablesChange::UpdateVpcDiscTables(vpcd_tables) => { - *self = vpcd_tables.clone(); - } - } - } - fn drop_first(self: Box) {} - fn sync_with(&mut self, first: &Self) { - *self = first.clone(); - } -} - -#[derive(Debug)] -pub struct VpcDiscTablesReader(ReadHandle); -impl VpcDiscTablesReader { - fn enter(&self) -> Option> { - self.0.enter() - } - - #[must_use] - pub fn factory(&self) -> VpcDiscTablesReaderFactory { - VpcDiscTablesReaderFactory(self.0.factory()) - } -} - -#[derive(Debug)] -pub struct VpcDiscTablesReaderFactory(ReadHandleFactory); -impl VpcDiscTablesReaderFactory { - #[must_use] - pub fn handle(&self) -> VpcDiscTablesReader { - VpcDiscTablesReader(self.0.handle()) - } -} - -#[derive(Debug)] -pub struct VpcDiscTablesWriter(WriteHandle); -impl VpcDiscTablesWriter { - #[must_use] - #[allow(clippy::new_without_default)] - pub fn new() -> VpcDiscTablesWriter { - let (w, _r) = new_from_empty::( - VpcDiscriminantTables::new(), - ); - VpcDiscTablesWriter(w) - } - #[must_use] - pub fn get_reader(&self) -> VpcDiscTablesReader { - VpcDiscTablesReader(self.0.clone()) - } - - pub fn get_reader_factory(&self) -> VpcDiscTablesReaderFactory { - self.get_reader().factory() - } - - pub fn update_vpcd_tables(&mut self, vpcd_tables: VpcDiscriminantTables) { - self.0 - .append(VpcDiscriminantTablesChange::UpdateVpcDiscTables( - vpcd_tables, - )); - self.0.publish(); - debug!("Updated tables for Destination vpcd Lookup"); - } -} - -#[derive(Debug, Clone)] -enum VpcDiscriminantTableValue { - PortRangeTable(DisjointRangesBTreeMap>), - NoPorts(Option), -} - -#[derive(Debug, Clone)] -struct VpcDiscriminantTable { - dst_vpcds: IpPrefixTrie, -} - -impl VpcDiscriminantTable { - fn new() -> Self { - Self { - dst_vpcds: IpPrefixTrie::new(), - } - } - - fn lookup( - &self, - addr: IpAddr, - port_opt: Option, - ) -> Option<(PrefixWithOptionalPorts, &Option)> { - self.dst_vpcds - .lookup(addr) - .and_then(|(prefix, value)| match value { - VpcDiscriminantTableValue::PortRangeTable(port_ranges_table) => { - let port = port_opt?; - port_ranges_table.lookup(&port).map(|(port_range, vpcd)| { - ( - PrefixWithOptionalPorts::new(prefix, Some(*port_range)), - vpcd, - ) - }) - } - VpcDiscriminantTableValue::NoPorts(vpcd) => Some((prefix.into(), vpcd)), - }) - } - - fn insert( - &mut self, - prefix: PrefixWithOptionalPorts, - value: Option, - ) -> Option { - let (table_prefix, table_value) = match prefix.ports() { - Some(ports) => { - let mut port_ranges_table = DisjointRangesBTreeMap::new(); - port_ranges_table.insert(ports, value); - ( - prefix.prefix(), - VpcDiscriminantTableValue::PortRangeTable(port_ranges_table), - ) - } - None => (prefix.prefix(), VpcDiscriminantTableValue::NoPorts(value)), - }; - self.dst_vpcds.insert(table_prefix, table_value) - } - - fn iter_for_prefix( - &self, - reference_prefix: &Prefix, - ) -> Box)> + '_> { - Box::new( - self.dst_vpcds.iter_for_prefix(reference_prefix).flat_map( - |(prefix, value)| match value { - VpcDiscriminantTableValue::PortRangeTable(port_ranges_table) => { - Box::new(port_ranges_table.iter().map(move |(port_range, vpcd)| { - ( - PrefixWithOptionalPorts::new(prefix, Some(*port_range)), - vpcd, - ) - })) - as Box< - dyn Iterator< - Item = (PrefixWithOptionalPorts, &Option), - >, - > - } - VpcDiscriminantTableValue::NoPorts(vpcd) => Box::new(std::iter::once(( - PrefixWithOptionalPorts::new(prefix, None), - vpcd, - ))) - as Box< - dyn Iterator< - Item = (PrefixWithOptionalPorts, &Option), - >, - >, - }, - ), - ) - } -} - -impl Default for VpcDiscriminantTable { - fn default() -> Self { - Self::new() - } -} - -pub struct DstVpcdLookup { - name: String, - tablesr: VpcDiscTablesReader, -} - -impl DstVpcdLookup { - pub fn new(name: &str, tablesr: VpcDiscTablesReader) -> Self { - Self { - name: name.to_string(), - tablesr, - } - } - - fn process_packet( - &self, - tablesr: &ReadGuard<'_, VpcDiscriminantTables>, - packet: &mut Packet, - ) { - let nfi = &self.name; - if packet.meta.dst_vpcd.is_some() { - debug!("{nfi}: Packet already has dst_vpcd: skipping"); - return; - } - let Some(net) = packet.headers().try_ip() else { - warn!("{nfi}: Packet has no Ip headers: can't look up dst_vpcd"); - packet.done(DoneReason::NotIp); - return; - }; - let Some(src_vpcd) = packet.meta.src_vpcd else { - warn!("{nfi}: Packet does not have src vpcd: marking as unroutable"); - packet.done(DoneReason::Unroutable); - return; - }; - let dst_ip = net.dst_addr(); - let dst_port = packet - .try_transport() - .and_then(|t| t.dst_port().map(NonZero::get)); - - if let Some(vpcd_table) = tablesr.tables_by_discriminant.get(&src_vpcd) { - if let Some((prefix, dst_vpcd_opt)) = vpcd_table.lookup(dst_ip, dst_port) { - // We found an entry in the destination VPC discriminant table, which means there - // are valid destinations for this packet, but the value may still be None if we're - // not able to tell the right destination VPC uniquely from this lookup. - if let Some(dst_vpcd) = dst_vpcd_opt { - debug!( - "{nfi}: Set packet dst_vpcd to {dst_vpcd} from src_vpcd:{src_vpcd}, prefix:{prefix}" - ); - packet.meta.dst_vpcd = Some(*dst_vpcd); - } else { - // If we can't tell, we'll need a look up in the flow table in a follow-up - // pipeline stage to see if we have a session that can help us determine the - // right destination VPC. So, we do NOT mark the packet as "done" here, we just - // pass it along with no dst_vpcd attached. - debug!( - "{nfi}: ambiguous dst_vpcd for {dst_ip} in src_vpcd {src_vpcd}: falling back to flow table lookup to see if a session exists" - ); - } - } else { - debug!( - "{nfi}: no dst_vpcd found for {dst_ip} in src_vpcd {src_vpcd}: marking packet as unroutable" - ); - packet.done(DoneReason::Unroutable); - } - } else { - debug!("{nfi}: no vpcd table found for src_vpcd {src_vpcd} (dst_addr={dst_ip})"); - packet.done(DoneReason::Unroutable); - } - } -} - -impl NetworkFunction for DstVpcdLookup { - #[allow(clippy::if_not_else)] - fn process<'a, Input: Iterator> + 'a>( - &'a mut self, - input: Input, - ) -> impl Iterator> + 'a { - input.filter_map(|mut packet| { - if let Some(tablesr) = &self.tablesr.enter() { - if !packet.is_done() { - // FIXME: ideally, we'd `enter` once for the whole batch. However, - // this requires boxing the closures, which may be worse than - // calling `enter` per packet? ... if not uglier - - self.process_packet(tablesr, &mut packet); - } - } else { - error!("{}: failed to read vpcd tables", self.name); - packet.done(DoneReason::InternalFailure); - } - packet.enforce() - }) - } -} - -#[cfg(test)] -mod test { - use super::{ - DstVpcdLookup, VpcDiscTablesWriter, VpcDiscriminantTable, VpcDiscriminantTableValue, - VpcDiscriminantTables, - }; - use lpm::prefix::Prefix; - use net::buffer::TestBuffer; - use net::headers::{Net, TryHeadersMut, TryIpMut}; - use net::ipv4::addr::UnicastIpv4Addr; - use net::ipv6::addr::UnicastIpv6Addr; - use net::packet::test_utils::{build_test_ipv4_packet, build_test_ipv6_packet}; - use net::packet::{DoneReason, Packet, VpcDiscriminant}; - use net::vxlan::Vni; - use pipeline::NetworkFunction; - use std::net::IpAddr; - - fn set_dst_addr(packet: &mut Packet, addr: IpAddr) { - let net = packet.headers_mut().try_ip_mut().unwrap(); - match net { - Net::Ipv4(ip) => { - ip.set_destination(UnicastIpv4Addr::try_from(addr).unwrap().into()); - } - Net::Ipv6(ip) => { - ip.set_destination(UnicastIpv6Addr::try_from(addr).unwrap().into()); - } - } - } - - fn create_test_packet(src_vni: Option, dst_addr: IpAddr) -> Packet { - let mut ret = match dst_addr { - IpAddr::V4(_) => build_test_ipv4_packet(100).unwrap(), - IpAddr::V6(_) => build_test_ipv6_packet(100).unwrap(), - }; - set_dst_addr(&mut ret, dst_addr); - ret.meta.src_vpcd = src_vni.map(VpcDiscriminant::VNI); - ret - } - - #[allow(clippy::too_many_lines)] - #[test] - fn test_dst_vni_lookup() { - //////////////////////////// - // Setup VNIs - let vni100 = Vni::new_checked(100).unwrap(); - let vni101 = Vni::new_checked(101).unwrap(); - let vni102 = Vni::new_checked(102).unwrap(); - let vni200 = Vni::new_checked(200).unwrap(); - let vni201 = Vni::new_checked(201).unwrap(); - let vni202 = Vni::new_checked(202).unwrap(); - - //////////////////////////// - // Setup VNI tables - - // VNI 100 - let mut vpcd_table_100 = VpcDiscriminantTable::new(); - let dst_vpcd_100_192_168_1_0_24 = VpcDiscriminant::VNI(vni101); - let dst_vpcd_100_192_168_0_0_16 = VpcDiscriminant::VNI(vni102); - vpcd_table_100.dst_vpcds.insert( - Prefix::from("192.168.1.0/24"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_100_192_168_1_0_24)), - ); - vpcd_table_100.dst_vpcds.insert( - Prefix::from("192.168.0.0/16"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_100_192_168_0_0_16)), - ); - vpcd_table_100.dst_vpcds.insert( - Prefix::from("::192.168.1.0/120"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_100_192_168_1_0_24)), - ); - vpcd_table_100.dst_vpcds.insert( - Prefix::from("::192.168.0.0/112"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_100_192_168_0_0_16)), - ); - - // VNI 200 - let mut vpcd_table_200 = VpcDiscriminantTable::new(); - let dst_vpcd_200_192_168_2_0_24 = VpcDiscriminant::VNI(vni201); - let dst_vpcd_200_192_168_0_0_16 = VpcDiscriminant::VNI(vni202); - vpcd_table_200.dst_vpcds.insert( - Prefix::from("192.168.2.0/24"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_200_192_168_2_0_24)), - ); - vpcd_table_200.dst_vpcds.insert( - Prefix::from("192.168.2.0/16"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_200_192_168_0_0_16)), - ); - vpcd_table_200.dst_vpcds.insert( - Prefix::from("::192.168.2.0/120"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_200_192_168_2_0_24)), - ); - vpcd_table_200.dst_vpcds.insert( - Prefix::from("::192.168.0.0/112"), - VpcDiscriminantTableValue::NoPorts(Some(dst_vpcd_200_192_168_0_0_16)), - ); - - //////////////////////////// - // Setup VpcDiscriminant tables writer - let mut vpcd_tables = VpcDiscriminantTables::new(); - vpcd_tables - .tables_by_discriminant - .insert(VpcDiscriminant::VNI(vni100), vpcd_table_100); - vpcd_tables - .tables_by_discriminant - .insert(VpcDiscriminant::VNI(vni200), vpcd_table_200); - let mut vpcd_tables_w = VpcDiscTablesWriter::new(); - vpcd_tables_w.update_vpcd_tables(vpcd_tables); - - //////////////////////////// - // Setup DstVpcdLookup stage - let mut dst_vpcd_lookup = DstVpcdLookup::new("test", vpcd_tables_w.get_reader()); - - //////////////////////////// - // Test IPv4 packets - - let p_100_dst_addr_192_168_1_1 = - create_test_packet(Some(vni100), "192.168.1.1".parse().unwrap()); - let p_100_dst_addr_192_168_100_1 = - create_test_packet(Some(vni100), "192.168.100.1".parse().unwrap()); - let p_200_dst_addr_192_168_2_1 = - create_test_packet(Some(vni200), "192.168.2.1".parse().unwrap()); - let p_200_dst_addr_10_0_0_1 = create_test_packet(Some(vni100), "10.0.0.1".parse().unwrap()); - let p_none_dst_addr = create_test_packet( - Some(Vni::new_checked(1000).unwrap()), - "192.168.100.1".parse().unwrap(), - ); - - let packets_in = [ - p_100_dst_addr_192_168_1_1, - p_100_dst_addr_192_168_100_1, - p_200_dst_addr_192_168_2_1, - p_200_dst_addr_10_0_0_1, - p_none_dst_addr, - ]; - let packets = dst_vpcd_lookup - .process(packets_in.into_iter()) - .collect::>(); - - assert_eq!(packets.len(), 5); - assert_eq!(packets[0].meta.dst_vpcd, Some(dst_vpcd_100_192_168_1_0_24)); - assert!(!packets[0].is_done()); - assert_eq!(packets[1].meta.dst_vpcd, Some(dst_vpcd_100_192_168_0_0_16)); - assert!(!packets[1].is_done()); - assert_eq!(packets[2].meta.dst_vpcd, Some(dst_vpcd_200_192_168_2_0_24)); - assert!(!packets[2].is_done()); - assert_eq!(packets[3].meta.dst_vpcd, None); - assert_eq!(packets[3].get_done(), Some(DoneReason::Unroutable)); - assert_eq!(packets[4].meta.dst_vpcd, None); - assert_eq!(packets[4].get_done(), Some(DoneReason::Unroutable)); - - //////////////////////////// - // Test IPv6 packets - - let p_100_dst_addr_v6_192_168_1_1 = - create_test_packet(Some(vni100), "::192.168.1.1".parse().unwrap()); - let p_100_dst_addr_v6_192_168_100_1 = - create_test_packet(Some(vni100), "::192.168.100.1".parse().unwrap()); - let p_200_dst_addr_v6_192_168_2_1 = - create_test_packet(Some(vni200), "::192.168.2.1".parse().unwrap()); - let p_200_dst_addr_v6_10_0_0_1 = - create_test_packet(Some(vni100), "::10.0.0.1".parse().unwrap()); - - let packets_in = [ - p_100_dst_addr_v6_192_168_1_1, - p_100_dst_addr_v6_192_168_100_1, - p_200_dst_addr_v6_192_168_2_1, - p_200_dst_addr_v6_10_0_0_1, - ]; - let packets = dst_vpcd_lookup - .process(packets_in.into_iter()) - .collect::>(); - assert_eq!(packets.len(), 4); - assert_eq!(packets[0].meta.dst_vpcd, Some(dst_vpcd_100_192_168_1_0_24)); - assert!(!packets[0].is_done()); - assert_eq!(packets[1].meta.dst_vpcd, Some(dst_vpcd_100_192_168_0_0_16)); - assert!(!packets[1].is_done()); - assert_eq!(packets[2].meta.dst_vpcd, Some(dst_vpcd_200_192_168_2_0_24)); - assert!(!packets[2].is_done()); - assert_eq!(packets[3].meta.dst_vpcd, None); - assert_eq!(packets[3].get_done(), Some(DoneReason::Unroutable)); - } -} diff --git a/pkt-meta/src/dst_vpcd_lookup/setup.rs b/pkt-meta/src/dst_vpcd_lookup/setup.rs deleted file mode 100644 index a0ca05fab..000000000 --- a/pkt-meta/src/dst_vpcd_lookup/setup.rs +++ /dev/null @@ -1,437 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// Copyright Open Network Fabric Authors - -use crate::dst_vpcd_lookup::{DstVpcdLookupError, VpcDiscriminantTable, VpcDiscriminantTables}; -use config::ConfigError; -use config::external::overlay::Overlay; -use config::external::overlay::vpc::{Peering, VpcTable}; -use config::utils::{ConfigUtilError, collapse_prefixes_peering}; -use lpm::prefix::{IpRangeWithPorts, PrefixWithOptionalPorts}; -use net::packet::VpcDiscriminant; -use tracing::debug; - -fn insert_prefix_or_update_duplicates( - table: &mut VpcDiscriminantTable, - dst_vpcd: VpcDiscriminant, - prefix: &PrefixWithOptionalPorts, - colliding_prefixes: Vec, -) { - if colliding_prefixes.is_empty() { - // No collision, insert new entry - table.insert(*prefix, Some(dst_vpcd)); - } else { - // For a given source VPC discriminant, for a given, we have overlapping destination - // prefixes with different destination VPC discriminants, meaning we cannot uniquely - // determine the destination VPC discriminant for a packet based on source VPC and - // an address from the intersection of these prefixes. - // - // Set the value as None, to indicate that we cannot tell for now. - // - // This value may be changed later, based on stateful NAT allocations. - for p in colliding_prefixes { - table.insert(p, None); - } - debug!( - "Destination VPC discriminant table: duplicate prefix {} for destination discriminant {}", - prefix, dst_vpcd - ); - table.insert(*prefix, None); - } -} - -fn process_prefix( - table: &mut VpcDiscriminantTable, - dst_vpcd: VpcDiscriminant, - prefix: &PrefixWithOptionalPorts, -) { - // FIXME: Complexity isn't ideal here (O(n^2)), because we compare each prefix with all other - // prefixes in the trie. We also process colliding prefixes multiple times (if A and B collide, - // we update them as duplicates when processing both A and B). We might need to look for - // optimization if working with large numbers of prefixes. - let colliding_prefixes = table - .iter_for_prefix(&prefix.prefix()) - .filter(|(k, v)| k.overlaps(prefix) && v.is_none_or(|v| v != dst_vpcd)) - .map(|(k, _)| k) - .collect::>(); - insert_prefix_or_update_duplicates(table, dst_vpcd, prefix, colliding_prefixes); -} - -fn process_peering( - table: &mut VpcDiscriminantTable, - peering: &Peering, - vpc_table: &VpcTable, -) -> Result<(), DstVpcdLookupError> { - let new_peering = collapse_prefixes_peering(peering).map_err(|e| match e { - ConfigUtilError::SplitPrefixError(prefix) => { - DstVpcdLookupError::BuildError(prefix.to_string()) - } - })?; - - // Get VPC discrimminant for remote manifest - let remote_vpcd = VpcDiscriminant::VNI( - vpc_table - .get_vpc_by_vpcid(&new_peering.remote_id) - .unwrap_or_else(|| unreachable!()) - .vni, - ); - - new_peering.remote.exposes.iter().for_each(|expose| { - for prefix in expose.public_ips() { - process_prefix(table, remote_vpcd, prefix); - } - }); - Ok(()) -} - -/// Build the `dst_vni_lookup` configuration from an overlay. -/// -/// # Errors -/// -/// Returns an error if the configuration cannot be built. -pub fn build_dst_vni_lookup_configuration( - overlay: &Overlay, -) -> Result { - let mut vni_tables = VpcDiscriminantTables::new(); - for vpc in overlay.vpc_table.values() { - let mut table = VpcDiscriminantTable::new(); - for peering in &vpc.peerings { - process_peering(&mut table, peering, &overlay.vpc_table) - .map_err(|e| ConfigError::FailureApply(e.to_string()))?; - } - vni_tables - .tables_by_discriminant - .insert(VpcDiscriminant::VNI(vpc.vni), table); - } - Ok(vni_tables) -} - -#[cfg(test)] -mod tests { - use super::*; - use config::external::overlay::Overlay; - use config::external::overlay::vpc::{Peering, Vpc, VpcTable}; - use config::external::overlay::vpcpeering::{VpcExpose, VpcManifest, VpcPeeringTable}; - use lpm::prefix::Prefix; - use net::vxlan::Vni; - use std::net::IpAddr; - - fn dst_vpcd_lookup( - vpcd_tables: &'_ VpcDiscriminantTables, - vpcd: VpcDiscriminant, - ip: IpAddr, - ) -> Option<(PrefixWithOptionalPorts, &'_ Option)> { - vpcd_tables - .tables_by_discriminant - .get(&vpcd) - .unwrap() - .lookup(ip, None) - } - - fn addr(addr: &str) -> IpAddr { - addr.parse::().unwrap() - } - - fn build_overlay() -> (Vni, Vni, Overlay) { - // Build VpcExpose objects - // - // expose: - // - ips: - // - cidr: 1.1.0.0/16 - // - cidr: 1.2.0.0/16 # <- 1.2.3.4 will match here - // - not: 1.1.5.0/24 # to account for when computing the offset - // - not: 1.1.3.0/24 # to account for when computing the offset - // - not: 1.1.1.0/24 # to account for when computing the offset - // - not: 1.2.2.0/24 # to account for when computing the offset - // as: - // - cidr: 2.2.0.0/16 - // - cidr: 2.1.0.0/16 # <- corresp. target range, initially - // # (prefixes in BTreeSet are sorted) - // # offset for 2.1.255.4, before applying exlusions - // # final offset is for 2.2.0.4 after accounting for the one - // # relevant exclusion prefix - // - not: 2.1.8.0/24 # to account for when fetching the address in range - // - not: 2.2.10.0/24 - // - not: 2.2.1.0/24 # ignored, offset too low - // - not: 2.2.2.0/24 # ignored, offset too low - // - ips: - // - cidr: 3.0.0.0/16 - // as: - // - cidr: 4.0.0.0/16 - let expose1 = VpcExpose::empty() - .ip("1.1.0.0/16".into()) - .not("1.1.5.0/24".into()) - .not("1.1.3.0/24".into()) - .not("1.1.1.0/24".into()) - .ip("1.2.0.0/16".into()) - .not("1.2.2.0/24".into()) - .as_range("2.2.0.0/16".into()) - .not_as("2.1.8.0/24".into()) - .not_as("2.2.10.0/24".into()) - .not_as("2.2.1.0/24".into()) - .not_as("2.2.2.0/24".into()) - .as_range("2.1.0.0/16".into()); - let expose2 = VpcExpose::empty() - .ip("3.0.0.0/16".into()) - .as_range("4.0.0.0/16".into()); - - let manifest1 = VpcManifest { - name: "VPC-1".into(), - exposes: vec![expose1, expose2], - }; - - // expose: - // - ips: # Note the lack of "as" here - // - cidr: 8.0.0.0/17 - // - cidr: 9.0.0.0/17 - // - not: 8.0.0.0/24 - // - ips: - // - cidr: 10.0.0.0/16 # <- corresponding target range - // - not: 10.0.1.0/24 # to account for when fetching the address in range - // - not: 10.0.2.0/24 # to account for when fetching the address in range - // as: - // - cidr: 5.5.0.0/17 - // - cidr: 5.6.0.0/17 # <- 5.6.7.8 will match here - // - not: 5.6.0.0/24 # to account for when computing the offset - // - not: 5.6.8.0/24 - let expose3 = VpcExpose::empty() - .ip("8.0.0.0/17".into()) - .not("8.0.0.0/24".into()) - .ip("9.0.0.0/17".into()); - let expose4 = VpcExpose::empty() - .ip("10.0.0.0/16".into()) - .not("10.0.1.0/24".into()) - .not("10.0.2.0/24".into()) - .as_range("5.5.0.0/17".into()) - .as_range("5.6.0.0/17".into()) - .not_as("5.6.0.0/24".into()) - .not_as("5.6.8.0/24".into()); - - let manifest2 = VpcManifest { - name: "VPC-2".into(), - exposes: vec![expose3, expose4], - }; - - let peering1 = Peering { - name: "test_peering1".into(), - local: manifest1.clone(), - remote: manifest2.clone(), - remote_id: "12345".try_into().expect("Failed to create VPC ID"), - gwgroup: None, - adv_communities: vec![], - }; - let peering2 = Peering { - name: "test_peering2".into(), - local: manifest2, - remote: manifest1, - remote_id: "67890".try_into().expect("Failed to create VPC ID"), - gwgroup: None, - adv_communities: vec![], - }; - - let mut vpctable = VpcTable::new(); - - // vpc-1 - let vni1 = Vni::new_checked(100).unwrap(); - let mut vpc1 = Vpc::new("VPC-1", "67890", vni1.as_u32()).unwrap(); - vpc1.peerings.push(peering1.clone()); - vpctable.add(vpc1).unwrap(); - - // vpc-2 - let vni2 = Vni::new_checked(200).unwrap(); - let mut vpc2 = Vpc::new("VPC-2", "12345", vni2.as_u32()).unwrap(); - vpc2.peerings.push(peering2.clone()); - vpctable.add(vpc2).unwrap(); - - // Now test building the dst_vni_lookup configuration - let overlay = Overlay { - vpc_table: vpctable, - peering_table: VpcPeeringTable::new(), - }; - - (vni1, vni2, overlay) - } - - #[test] - fn test_setup() { - let (vni1, vni2, overlay) = build_overlay(); - let (vpcd1, vpcd2) = (VpcDiscriminant::VNI(vni1), VpcDiscriminant::VNI(vni2)); - let result = build_dst_vni_lookup_configuration(&overlay); - assert!( - result.is_ok(), - "Failed to build dst_vni_lookup configuration:\n{:#?}", - result.err() - ); - - let vpcd_tables = result.unwrap(); - assert_eq!(vpcd_tables.tables_by_discriminant.len(), 2); - println!( - "vni_tables: {:?}", - vpcd_tables - .tables_by_discriminant - .get(&vpcd1) - .unwrap() - .dst_vpcds - ); - - ////////////////////// - // table for vni 1 (uses second expose block, ensures we look at them all) - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd1, addr("5.5.5.1")), - Some((Prefix::from("5.5.0.0/17").into(), &Some(vpcd2))) - ); - - assert_eq!(dst_vpcd_lookup(&vpcd_tables, vpcd1, addr("5.6.0.1")), None); - - // Make sure dst VNI lookup for non-NAT stuff works - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd1, addr("8.0.1.1")), - Some((Prefix::from("8.0.1.0/24").into(), &Some(vpcd2))) - ); - - ////////////////////// - // table for vni 2 (uses first expose block, ensures we look at them all) - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd2, addr("2.2.0.1")), - Some((Prefix::from("2.2.0.0/24").into(), &Some(vpcd1))) - ); - - assert_eq!(dst_vpcd_lookup(&vpcd_tables, vpcd2, addr("2.2.2.1")), None); - } - - fn build_overlay_overlap() -> Overlay { - let mut manifest12 = VpcManifest::new("VPC-1"); - let mut manifest21 = VpcManifest::new("VPC-2"); - let mut manifest23 = VpcManifest::new("VPC-2"); - let mut manifest32 = VpcManifest::new("VPC-3"); - - manifest12 - .add_expose( - VpcExpose::empty() - .ip("1.0.0.0/24".into()) - .as_range("20.0.0.0/24".into()) - .as_range("21.0.0.0/16".into()) - .as_range("22.0.0.0/24".into()), - ) - .unwrap(); - - manifest21 - .add_expose(VpcExpose::empty().ip("2.0.0.0/24".into())) - .unwrap(); - - manifest23 - .add_expose(VpcExpose::empty().ip("3.0.0.0/24".into())) - .unwrap(); - - manifest32 - .add_expose( - VpcExpose::empty() - .ip("4.0.0.0/24".into()) - .as_range("20.0.0.0/24".into()) // Same as manifest12's 20.0.0.0/24 - .as_range("21.0.0.0/24".into()) // Overlap with manifest12's 21.0.0.0/16 - .as_range("25.0.0.0/24".into()), // No overlap with manifest12 - ) - .unwrap(); - - let peering12 = Peering { - name: "VPC-1--VPC-2".into(), - local: manifest12.clone(), - remote: manifest21.clone(), - remote_id: "VPC02".try_into().unwrap(), - gwgroup: None, - adv_communities: vec![], - }; - let peering21 = Peering { - name: "VPC-2--VPC-1".into(), - local: manifest21.clone(), - remote: manifest12.clone(), - remote_id: "VPC01".try_into().unwrap(), - gwgroup: None, - adv_communities: vec![], - }; - let peering23 = Peering { - name: "VPC-2--VPC-3".into(), - local: manifest23.clone(), - remote: manifest32.clone(), - remote_id: "VPC03".try_into().unwrap(), - gwgroup: None, - adv_communities: vec![], - }; - let peering32 = Peering { - name: "VPC-3--VPC-2".into(), - local: manifest32.clone(), - remote: manifest23.clone(), - remote_id: "VPC02".try_into().unwrap(), - gwgroup: None, - adv_communities: vec![], - }; - - let mut vpc_table = VpcTable::new(); - - let mut vpc1 = Vpc::new("VPC-1", "VPC01", 100).unwrap(); - vpc1.peerings.push(peering12); - vpc_table.add(vpc1).unwrap(); - - let mut vpc2 = Vpc::new("VPC-2", "VPC02", 200).unwrap(); - vpc2.peerings.push(peering21); - vpc2.peerings.push(peering23); - vpc_table.add(vpc2).unwrap(); - - let mut vpc3 = Vpc::new("VPC-3", "VPC03", 300).unwrap(); - vpc3.peerings.push(peering32); - vpc_table.add(vpc3).unwrap(); - - Overlay::new(vpc_table, VpcPeeringTable::new()) - } - - #[test] - fn test_setup_overlap() { - let overlay = build_overlay_overlap(); - let vpcd_tables = build_dst_vni_lookup_configuration(&overlay).unwrap(); - println!("vpcd_tables: {vpcd_tables:#?}"); - - assert_eq!(vpcd_tables.tables_by_discriminant.len(), 3); - - let (vpcd1, vpcd2, vpcd3) = ( - VpcDiscriminant::VNI(Vni::new_checked(100).unwrap()), - VpcDiscriminant::VNI(Vni::new_checked(200).unwrap()), - VpcDiscriminant::VNI(Vni::new_checked(300).unwrap()), - ); - - // Check lookup with vpc-1 or vpc-3 as source - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd1, addr("2.0.0.2")), - Some((Prefix::from("2.0.0.0/24").into(), &Some(vpcd2))) - ); - - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd3, addr("3.0.0.2")), - Some((Prefix::from("3.0.0.0/24").into(), &Some(vpcd2))) - ); - - // Check overlap: same prefixes - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd2, addr("20.0.0.2")), - Some((Prefix::from("20.0.0.0/24").into(), &None)) // No destination VPC discriminant - ); - - // Check overlap: different but overlapping prefixes - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd2, addr("21.0.0.2")), - Some((Prefix::from("21.0.0.0/24").into(), &None)) // No destination VPC discriminant - ); - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd2, addr("21.0.255.2")), - Some((Prefix::from("21.0.0.0/16").into(), &None)) // No destination VPC discriminant - ); - - // Check overlap: overlapping VpcExpose, but prefixes with no overlap - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd2, addr("22.0.0.2")), - Some((Prefix::from("22.0.0.0/24").into(), &Some(vpcd1))) - ); - assert_eq!( - dst_vpcd_lookup(&vpcd_tables, vpcd2, addr("25.0.0.2")), - Some((Prefix::from("25.0.0.0/24").into(), &Some(vpcd3))) - ); - } -} diff --git a/pkt-meta/src/lib.rs b/pkt-meta/src/lib.rs index 803c1e977..554201099 100644 --- a/pkt-meta/src/lib.rs +++ b/pkt-meta/src/lib.rs @@ -3,5 +3,4 @@ #![deny(clippy::all, clippy::pedantic)] -pub mod dst_vpcd_lookup; pub mod flow_table;