|
| 1 | +netfilter: bridge: confirm multicast packets before passing them up the stack |
| 2 | + |
| 3 | +jira LE-1907 |
| 4 | +cve CVE-2024-27415 |
| 5 | +Rebuild_History Non-Buildable kernel-5.14.0-427.33.1.el9_4 |
| 6 | +commit-author Florian Westphal <fw@strlen.de> |
| 7 | +commit 62e7151ae3eb465e0ab52a20c941ff33bb6332e9 |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-5.14.0-427.33.1.el9_4/62e7151a.failed |
| 11 | + |
| 12 | +conntrack nf_confirm logic cannot handle cloned skbs referencing |
| 13 | +the same nf_conn entry, which will happen for multicast (broadcast) |
| 14 | +frames on bridges. |
| 15 | + |
| 16 | + Example: |
| 17 | + macvlan0 |
| 18 | + | |
| 19 | + br0 |
| 20 | + / \ |
| 21 | + ethX ethY |
| 22 | + |
| 23 | + ethX (or Y) receives a L2 multicast or broadcast packet containing |
| 24 | + an IP packet, flow is not yet in conntrack table. |
| 25 | + |
| 26 | + 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. |
| 27 | + -> skb->_nfct now references a unconfirmed entry |
| 28 | + 2. skb is broad/mcast packet. bridge now passes clones out on each bridge |
| 29 | + interface. |
| 30 | + 3. skb gets passed up the stack. |
| 31 | + 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb |
| 32 | + and schedules a work queue to send them out on the lower devices. |
| 33 | + |
| 34 | + The clone skb->_nfct is not a copy, it is the same entry as the |
| 35 | + original skb. The macvlan rx handler then returns RX_HANDLER_PASS. |
| 36 | + 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. |
| 37 | + |
| 38 | +The Macvlan broadcast worker and normal confirm path will race. |
| 39 | + |
| 40 | +This race will not happen if step 2 already confirmed a clone. In that |
| 41 | +case later steps perform skb_clone() with skb->_nfct already confirmed (in |
| 42 | +hash table). This works fine. |
| 43 | + |
| 44 | +But such confirmation won't happen when eb/ip/nftables rules dropped the |
| 45 | +packets before they reached the nf_confirm step in postrouting. |
| 46 | + |
| 47 | +Pablo points out that nf_conntrack_bridge doesn't allow use of stateful |
| 48 | +nat, so we can safely discard the nf_conn entry and let inet call |
| 49 | +conntrack again. |
| 50 | + |
| 51 | +This doesn't work for bridge netfilter: skb could have a nat |
| 52 | +transformation. Also bridge nf prevents re-invocation of inet prerouting |
| 53 | +via 'sabotage_in' hook. |
| 54 | + |
| 55 | +Work around this problem by explicit confirmation of the entry at LOCAL_IN |
| 56 | +time, before upper layer has a chance to clone the unconfirmed entry. |
| 57 | + |
| 58 | +The downside is that this disables NAT and conntrack helpers. |
| 59 | + |
| 60 | +Alternative fix would be to add locking to all code parts that deal with |
| 61 | +unconfirmed packets, but even if that could be done in a sane way this |
| 62 | +opens up other problems, for example: |
| 63 | + |
| 64 | +-m physdev --physdev-out eth0 -j SNAT --snat-to 1.2.3.4 |
| 65 | +-m physdev --physdev-out eth1 -j SNAT --snat-to 1.2.3.5 |
| 66 | + |
| 67 | +For multicast case, only one of such conflicting mappings will be |
| 68 | +created, conntrack only handles 1:1 NAT mappings. |
| 69 | + |
| 70 | +Users should set create a setup that explicitly marks such traffic |
| 71 | +NOTRACK (conntrack bypass) to avoid this, but we cannot auto-bypass |
| 72 | +them, ruleset might have accept rules for untracked traffic already, |
| 73 | +so user-visible behaviour would change. |
| 74 | + |
| 75 | + Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org> |
| 76 | +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") |
| 77 | +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217777 |
| 78 | + Signed-off-by: Florian Westphal <fw@strlen.de> |
| 79 | + Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> |
| 80 | +(cherry picked from commit 62e7151ae3eb465e0ab52a20c941ff33bb6332e9) |
| 81 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 82 | + |
| 83 | +# Conflicts: |
| 84 | +# include/linux/netfilter.h |
| 85 | +# net/netfilter/nf_conntrack_core.c |
| 86 | +diff --cc include/linux/netfilter.h |
| 87 | +index adcda9cf6fe6,ce660d51549b..000000000000 |
| 88 | +--- a/include/linux/netfilter.h |
| 89 | ++++ b/include/linux/netfilter.h |
| 90 | +@@@ -475,6 -473,8 +475,11 @@@ struct nf_ct_hook |
| 91 | + bool (*get_tuple_skb)(struct nf_conntrack_tuple *, |
| 92 | + const struct sk_buff *); |
| 93 | + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); |
| 94 | +++<<<<<<< HEAD |
| 95 | +++======= |
| 96 | ++ void (*set_closing)(struct nf_conntrack *nfct); |
| 97 | ++ int (*confirm)(struct sk_buff *skb); |
| 98 | +++>>>>>>> 62e7151ae3eb (netfilter: bridge: confirm multicast packets before passing them up the stack) |
| 99 | + }; |
| 100 | + extern const struct nf_ct_hook __rcu *nf_ct_hook; |
| 101 | + |
| 102 | +diff --cc net/netfilter/nf_conntrack_core.c |
| 103 | +index 2106625567af,5b876fa7f9af..000000000000 |
| 104 | +--- a/net/netfilter/nf_conntrack_core.c |
| 105 | ++++ b/net/netfilter/nf_conntrack_core.c |
| 106 | +@@@ -2790,6 -2755,8 +2790,11 @@@ static const struct nf_ct_hook nf_connt |
| 107 | + .destroy = nf_ct_destroy, |
| 108 | + .get_tuple_skb = nf_conntrack_get_tuple_skb, |
| 109 | + .attach = nf_conntrack_attach, |
| 110 | +++<<<<<<< HEAD |
| 111 | +++======= |
| 112 | ++ .set_closing = nf_conntrack_set_closing, |
| 113 | ++ .confirm = __nf_conntrack_confirm, |
| 114 | +++>>>>>>> 62e7151ae3eb (netfilter: bridge: confirm multicast packets before passing them up the stack) |
| 115 | + }; |
| 116 | + |
| 117 | + void nf_conntrack_init_end(void) |
| 118 | +* Unmerged path include/linux/netfilter.h |
| 119 | +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c |
| 120 | +index 9421c5a097bd..0a0b5d540068 100644 |
| 121 | +--- a/net/bridge/br_netfilter_hooks.c |
| 122 | ++++ b/net/bridge/br_netfilter_hooks.c |
| 123 | +@@ -43,6 +43,10 @@ |
| 124 | + #include <linux/sysctl.h> |
| 125 | + #endif |
| 126 | + |
| 127 | ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) |
| 128 | ++#include <net/netfilter/nf_conntrack_core.h> |
| 129 | ++#endif |
| 130 | ++ |
| 131 | + static unsigned int brnf_net_id __read_mostly; |
| 132 | + |
| 133 | + struct brnf_net { |
| 134 | +@@ -538,6 +542,90 @@ static unsigned int br_nf_pre_routing(void *priv, |
| 135 | + return NF_STOLEN; |
| 136 | + } |
| 137 | + |
| 138 | ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) |
| 139 | ++/* conntracks' nf_confirm logic cannot handle cloned skbs referencing |
| 140 | ++ * the same nf_conn entry, which will happen for multicast (broadcast) |
| 141 | ++ * Frames on bridges. |
| 142 | ++ * |
| 143 | ++ * Example: |
| 144 | ++ * macvlan0 |
| 145 | ++ * br0 |
| 146 | ++ * ethX ethY |
| 147 | ++ * |
| 148 | ++ * ethX (or Y) receives multicast or broadcast packet containing |
| 149 | ++ * an IP packet, not yet in conntrack table. |
| 150 | ++ * |
| 151 | ++ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. |
| 152 | ++ * -> skb->_nfct now references a unconfirmed entry |
| 153 | ++ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge |
| 154 | ++ * interface. |
| 155 | ++ * 3. skb gets passed up the stack. |
| 156 | ++ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb |
| 157 | ++ * and schedules a work queue to send them out on the lower devices. |
| 158 | ++ * |
| 159 | ++ * The clone skb->_nfct is not a copy, it is the same entry as the |
| 160 | ++ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. |
| 161 | ++ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. |
| 162 | ++ * |
| 163 | ++ * The Macvlan broadcast worker and normal confirm path will race. |
| 164 | ++ * |
| 165 | ++ * This race will not happen if step 2 already confirmed a clone. In that |
| 166 | ++ * case later steps perform skb_clone() with skb->_nfct already confirmed (in |
| 167 | ++ * hash table). This works fine. |
| 168 | ++ * |
| 169 | ++ * But such confirmation won't happen when eb/ip/nftables rules dropped the |
| 170 | ++ * packets before they reached the nf_confirm step in postrouting. |
| 171 | ++ * |
| 172 | ++ * Work around this problem by explicit confirmation of the entry at |
| 173 | ++ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed |
| 174 | ++ * entry. |
| 175 | ++ * |
| 176 | ++ */ |
| 177 | ++static unsigned int br_nf_local_in(void *priv, |
| 178 | ++ struct sk_buff *skb, |
| 179 | ++ const struct nf_hook_state *state) |
| 180 | ++{ |
| 181 | ++ struct nf_conntrack *nfct = skb_nfct(skb); |
| 182 | ++ const struct nf_ct_hook *ct_hook; |
| 183 | ++ struct nf_conn *ct; |
| 184 | ++ int ret; |
| 185 | ++ |
| 186 | ++ if (!nfct || skb->pkt_type == PACKET_HOST) |
| 187 | ++ return NF_ACCEPT; |
| 188 | ++ |
| 189 | ++ ct = container_of(nfct, struct nf_conn, ct_general); |
| 190 | ++ if (likely(nf_ct_is_confirmed(ct))) |
| 191 | ++ return NF_ACCEPT; |
| 192 | ++ |
| 193 | ++ WARN_ON_ONCE(skb_shared(skb)); |
| 194 | ++ WARN_ON_ONCE(refcount_read(&nfct->use) != 1); |
| 195 | ++ |
| 196 | ++ /* We can't call nf_confirm here, it would create a dependency |
| 197 | ++ * on nf_conntrack module. |
| 198 | ++ */ |
| 199 | ++ ct_hook = rcu_dereference(nf_ct_hook); |
| 200 | ++ if (!ct_hook) { |
| 201 | ++ skb->_nfct = 0ul; |
| 202 | ++ nf_conntrack_put(nfct); |
| 203 | ++ return NF_ACCEPT; |
| 204 | ++ } |
| 205 | ++ |
| 206 | ++ nf_bridge_pull_encap_header(skb); |
| 207 | ++ ret = ct_hook->confirm(skb); |
| 208 | ++ switch (ret & NF_VERDICT_MASK) { |
| 209 | ++ case NF_STOLEN: |
| 210 | ++ return NF_STOLEN; |
| 211 | ++ default: |
| 212 | ++ nf_bridge_push_encap_header(skb); |
| 213 | ++ break; |
| 214 | ++ } |
| 215 | ++ |
| 216 | ++ ct = container_of(nfct, struct nf_conn, ct_general); |
| 217 | ++ WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); |
| 218 | ++ |
| 219 | ++ return ret; |
| 220 | ++} |
| 221 | ++#endif |
| 222 | + |
| 223 | + /* PF_BRIDGE/FORWARD *************************************************/ |
| 224 | + static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) |
| 225 | +@@ -936,6 +1024,14 @@ static const struct nf_hook_ops br_nf_ops[] = { |
| 226 | + .hooknum = NF_BR_PRE_ROUTING, |
| 227 | + .priority = NF_BR_PRI_BRNF, |
| 228 | + }, |
| 229 | ++#if IS_ENABLED(CONFIG_NF_CONNTRACK) |
| 230 | ++ { |
| 231 | ++ .hook = br_nf_local_in, |
| 232 | ++ .pf = NFPROTO_BRIDGE, |
| 233 | ++ .hooknum = NF_BR_LOCAL_IN, |
| 234 | ++ .priority = NF_BR_PRI_LAST, |
| 235 | ++ }, |
| 236 | ++#endif |
| 237 | + { |
| 238 | + .hook = br_nf_forward_ip, |
| 239 | + .pf = NFPROTO_BRIDGE, |
| 240 | +diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c |
| 241 | +index 56efda595b81..ecc9cc481767 100644 |
| 242 | +--- a/net/bridge/netfilter/nf_conntrack_bridge.c |
| 243 | ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c |
| 244 | +@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, |
| 245 | + return nf_conntrack_in(skb, &bridge_state); |
| 246 | + } |
| 247 | + |
| 248 | ++static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, |
| 249 | ++ const struct nf_hook_state *state) |
| 250 | ++{ |
| 251 | ++ enum ip_conntrack_info ctinfo; |
| 252 | ++ struct nf_conn *ct; |
| 253 | ++ |
| 254 | ++ if (skb->pkt_type == PACKET_HOST) |
| 255 | ++ return NF_ACCEPT; |
| 256 | ++ |
| 257 | ++ /* nf_conntrack_confirm() cannot handle concurrent clones, |
| 258 | ++ * this happens for broad/multicast frames with e.g. macvlan on top |
| 259 | ++ * of the bridge device. |
| 260 | ++ */ |
| 261 | ++ ct = nf_ct_get(skb, &ctinfo); |
| 262 | ++ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) |
| 263 | ++ return NF_ACCEPT; |
| 264 | ++ |
| 265 | ++ /* let inet prerouting call conntrack again */ |
| 266 | ++ skb->_nfct = 0; |
| 267 | ++ nf_ct_put(ct); |
| 268 | ++ |
| 269 | ++ return NF_ACCEPT; |
| 270 | ++} |
| 271 | ++ |
| 272 | + static void nf_ct_bridge_frag_save(struct sk_buff *skb, |
| 273 | + struct nf_bridge_frag_data *data) |
| 274 | + { |
| 275 | +@@ -415,6 +439,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { |
| 276 | + .hooknum = NF_BR_PRE_ROUTING, |
| 277 | + .priority = NF_IP_PRI_CONNTRACK, |
| 278 | + }, |
| 279 | ++ { |
| 280 | ++ .hook = nf_ct_bridge_in, |
| 281 | ++ .pf = NFPROTO_BRIDGE, |
| 282 | ++ .hooknum = NF_BR_LOCAL_IN, |
| 283 | ++ .priority = NF_IP_PRI_CONNTRACK_CONFIRM, |
| 284 | ++ }, |
| 285 | + { |
| 286 | + .hook = nf_ct_bridge_post, |
| 287 | + .pf = NFPROTO_BRIDGE, |
| 288 | +* Unmerged path net/netfilter/nf_conntrack_core.c |
0 commit comments