From 1e253edaa798160e6cd111c77c5b572b2c7c9085 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 17 Jun 2019 19:46:08 +0100 Subject: [PATCH] Add TCP DoS fixes --- debian/changelog | 4 + .../all/tcp-add-tcp_min_snd_mss-sysctl.patch | 123 ++++++++++++++ ...e-tcp_min_snd_mss-in-tcp_mtu_probing.patch | 36 ++++ ...cp-limit-payload-size-of-sacked-skbs.patch | 154 ++++++++++++++++++ ...ment-should-apply-sane-memory-limits.patch | 70 ++++++++ debian/patches/series | 4 + 6 files changed, 391 insertions(+) create mode 100644 debian/patches/bugfix/all/tcp-add-tcp_min_snd_mss-sysctl.patch create mode 100644 debian/patches/bugfix/all/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch create mode 100644 debian/patches/bugfix/all/tcp-limit-payload-size-of-sacked-skbs.patch create mode 100644 debian/patches/bugfix/all/tcp-tcp_fragment-should-apply-sane-memory-limits.patch diff --git a/debian/changelog b/debian/changelog index 9ddfe6a3d..7308e63b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -15,6 +15,10 @@ linux (4.19.37-4) UNRELEASED; urgency=medium * mm/mincore.c: make mincore() more conservative (CVE-2019-5489) * mwifiex: Fix heap overflow in mwifiex_uap_parse_tail_ies() (CVE-2019-10126) + * tcp: limit payload size of sacked skbs (CVE-2019-11477) + * tcp: tcp_fragment() should apply sane memory limits (CVE-2019-11478) + * tcp: add tcp_min_snd_mss sysctl (CVE-2019-11479) + * tcp: enforce tcp_min_snd_mss in tcp_mtu_probing() [ Romain Perier ] * [rt] Update to 4.19.37-rt20 diff --git a/debian/patches/bugfix/all/tcp-add-tcp_min_snd_mss-sysctl.patch b/debian/patches/bugfix/all/tcp-add-tcp_min_snd_mss-sysctl.patch new file mode 100644 index 000000000..18985b044 --- /dev/null +++ b/debian/patches/bugfix/all/tcp-add-tcp_min_snd_mss-sysctl.patch @@ -0,0 +1,123 @@ +From: Eric Dumazet +Date: Mon, 17 Jun 2019 10:03:53 -0700 +Subject: [PATCH net 3/4] tcp: add tcp_min_snd_mss sysctl +Origin: https://patchwork.ozlabs.org/patch/1117157/ + +Some TCP peers announce a very small MSS option in their SYN and/or +SYN/ACK messages. + +This forces the stack to send packets with a very high network/cpu +overhead. + +Linux has enforced a minimal value of 48. Since this value includes +the size of TCP options, and that the options can consume up to 40 +bytes, this means that each segment can include only 8 bytes of payload. + +In some cases, it can be useful to increase the minimal value +to a saner value. + +We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility +reasons. + +Note that TCP_MAXSEG socket option enforces a minimal value +of (TCP_MIN_MSS). David Miller increased this minimal value +in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.") +from 64 to 88. + +We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS. + +CVE-2019-11479 -- tcp mss hardcoded to 48 + +Signed-off-by: Eric Dumazet +Suggested-by: Jonathan Looney +Acked-by: Neal Cardwell +Cc: Yuchung Cheng +Cc: Tyler Hicks +Cc: Bruce Curtis +Cc: Jonathan Lemon +Acked-by: Jonathan Lemon +Acked-by: Tyler Hicks +--- + Documentation/networking/ip-sysctl.txt | 8 ++++++++ + include/net/netns/ipv4.h | 1 + + net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ + net/ipv4/tcp_ipv4.c | 1 + + net/ipv4/tcp_output.c | 3 +-- + 5 files changed, 22 insertions(+), 2 deletions(-) + +--- a/Documentation/networking/ip-sysctl.txt ++++ b/Documentation/networking/ip-sysctl.txt +@@ -250,6 +250,14 @@ tcp_base_mss - INTEGER + Path MTU discovery (MTU probing). If MTU probing is enabled, + this is the initial MSS used by the connection. + ++tcp_min_snd_mss - INTEGER ++ TCP SYN and SYNACK messages usually advertise an ADVMSS option, ++ as described in RFC 1122 and RFC 6691. ++ If this ADVMSS option is smaller than tcp_min_snd_mss, ++ it is silently capped to tcp_min_snd_mss. ++ ++ Default : 48 (at least 8 bytes of payload per segment) ++ + tcp_congestion_control - STRING + Set the congestion control algorithm to be used for new + connections. The algorithm "reno" is always available, but +--- a/include/net/netns/ipv4.h ++++ b/include/net/netns/ipv4.h +@@ -113,6 +113,7 @@ struct netns_ipv4 { + #endif + int sysctl_tcp_mtu_probing; + int sysctl_tcp_base_mss; ++ int sysctl_tcp_min_snd_mss; + int sysctl_tcp_probe_threshold; + u32 sysctl_tcp_probe_interval; + +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { + static int ip_local_port_range_max[] = { 65535, 65535 }; + static int tcp_adv_win_scale_min = -31; + static int tcp_adv_win_scale_max = 31; ++static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; ++static int tcp_min_snd_mss_max = 65535; + static int ip_privileged_port_min; + static int ip_privileged_port_max = 65535; + static int ip_ttl_min = 1; +@@ -737,6 +739,15 @@ static struct ctl_table ipv4_net_table[] + .proc_handler = proc_dointvec, + }, + { ++ .procname = "tcp_min_snd_mss", ++ .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &tcp_min_snd_mss_min, ++ .extra2 = &tcp_min_snd_mss_max, ++ }, ++ { + .procname = "tcp_probe_threshold", + .data = &init_net.ipv4.sysctl_tcp_probe_threshold, + .maxlen = sizeof(int), +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -2527,6 +2527,7 @@ static int __net_init tcp_sk_init(struct + net->ipv4.sysctl_tcp_ecn_fallback = 1; + + net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; ++ net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; + net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; + net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1462,8 +1462,7 @@ static inline int __tcp_mtu_to_mss(struc + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ +- if (mss_now < TCP_MIN_SND_MSS) +- mss_now = TCP_MIN_SND_MSS; ++ mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + return mss_now; + } + diff --git a/debian/patches/bugfix/all/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch b/debian/patches/bugfix/all/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch new file mode 100644 index 000000000..17104536e --- /dev/null +++ b/debian/patches/bugfix/all/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch @@ -0,0 +1,36 @@ +From: Eric Dumazet +Date: Mon, 17 Jun 2019 10:03:54 -0700 +Subject: [PATCH net 4/4] tcp: enforce tcp_min_snd_mss in tcp_mtu_probing() +Origin: https://patchwork.ozlabs.org/patch/1117158/ + +If mtu probing is enabled tcp_mtu_probing() could very well end up +with a too small MSS. + +Use the new sysctl tcp_min_snd_mss to make sure MSS search +is performed in an acceptable range. + +CVE-2019-11479 -- tcp mss hardcoded to 48 + +Signed-off-by: Eric Dumazet +Reported-by: Jonathan Lemon +Cc: Jonathan Looney +Acked-by: Neal Cardwell +Cc: Yuchung Cheng +Cc: Tyler Hicks +Cc: Bruce Curtis +Acked-by: Jonathan Lemon +Acked-by: Tyler Hicks +--- + net/ipv4/tcp_timer.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -166,6 +166,7 @@ static void tcp_mtu_probing(struct inet_ + mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; + mss = min(net->ipv4.sysctl_tcp_base_mss, mss); + mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); ++ mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + } + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/debian/patches/bugfix/all/tcp-limit-payload-size-of-sacked-skbs.patch b/debian/patches/bugfix/all/tcp-limit-payload-size-of-sacked-skbs.patch new file mode 100644 index 000000000..fc5f6af1d --- /dev/null +++ b/debian/patches/bugfix/all/tcp-limit-payload-size-of-sacked-skbs.patch @@ -0,0 +1,154 @@ +From: Eric Dumazet +Date: Mon, 17 Jun 2019 10:03:51 -0700 +Subject: [PATCH net 1/4] tcp: limit payload size of sacked skbs +Origin: https://patchwork.ozlabs.org/patch/1117155/ + +Jonathan Looney reported that TCP can trigger the following crash +in tcp_shifted_skb() : + + BUG_ON(tcp_skb_pcount(skb) < pcount); + +This can happen if the remote peer has advertized the smallest +MSS that linux TCP accepts : 48 + +An skb can hold 17 fragments, and each fragment can hold 32KB +on x86, or 64KB on PowerPC. + +This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs +can overflow. + +Note that tcp_sendmsg() builds skbs with less than 64KB +of payload, so this problem needs SACK to be enabled. +SACK blocks allow TCP to coalesce multiple skbs in the retransmit +queue, thus filling the 17 fragments to maximal capacity. + +CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs + +Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing") +Signed-off-by: Eric Dumazet +Reported-by: Jonathan Looney +Acked-by: Neal Cardwell +Reviewed-by: Tyler Hicks +Cc: Yuchung Cheng +Cc: Bruce Curtis +Cc: Jonathan Lemon +Acked-by: Jonathan Lemon +--- + include/linux/tcp.h | 4 ++++ + include/net/tcp.h | 2 ++ + net/ipv4/tcp.c | 1 + + net/ipv4/tcp_input.c | 26 ++++++++++++++++++++------ + net/ipv4/tcp_output.c | 6 +++--- + 5 files changed, 30 insertions(+), 9 deletions(-) + +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -485,4 +485,8 @@ static inline u16 tcp_mss_clamp(const st + + return (user_mss && user_mss < mss) ? user_mss : mss; + } ++ ++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, ++ int shiftlen); ++ + #endif /* _LINUX_TCP_H */ +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -55,6 +55,8 @@ void tcp_time_wait(struct sock *sk, int + + #define MAX_TCP_HEADER (128 + MAX_HEADER) + #define MAX_TCP_OPTION_SPACE 40 ++#define TCP_MIN_SND_MSS 48 ++#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) + + /* + * Never offer a window over 32767 without using window scaling. Some +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3829,6 +3829,7 @@ void __init tcp_init(void) + unsigned long limit; + unsigned int i; + ++ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1315,7 +1315,7 @@ static bool tcp_shifted_skb(struct sock + TCP_SKB_CB(skb)->seq += shifted; + + tcp_skb_pcount_add(prev, pcount); +- BUG_ON(tcp_skb_pcount(skb) < pcount); ++ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); + tcp_skb_pcount_add(skb, -pcount); + + /* When we're adding to gso_segs == 1, gso_size will be zero, +@@ -1381,6 +1381,21 @@ static int skb_can_shift(const struct sk + return !skb_headlen(skb) && skb_is_nonlinear(skb); + } + ++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, ++ int pcount, int shiftlen) ++{ ++ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) ++ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need ++ * to make sure not storing more than 65535 * 8 bytes per skb, ++ * even if current MSS is bigger. ++ */ ++ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) ++ return 0; ++ if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) ++ return 0; ++ return skb_shift(to, from, shiftlen); ++} ++ + /* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +@@ -1486,7 +1501,7 @@ static struct sk_buff *tcp_shift_skb_dat + if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) + goto fallback; + +- if (!skb_shift(prev, skb, len)) ++ if (!tcp_skb_shift(prev, skb, pcount, len)) + goto fallback; + if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) + goto out; +@@ -1504,11 +1519,10 @@ static struct sk_buff *tcp_shift_skb_dat + goto out; + + len = skb->len; +- if (skb_shift(prev, skb, len)) { +- pcount += tcp_skb_pcount(skb); +- tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), ++ pcount = tcp_skb_pcount(skb); ++ if (tcp_skb_shift(prev, skb, pcount, len)) ++ tcp_shifted_skb(sk, prev, skb, state, pcount, + len, mss, 0); +- } + + out: + return prev; +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1457,8 +1457,8 @@ static inline int __tcp_mtu_to_mss(struc + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ +- if (mss_now < 48) +- mss_now = 48; ++ if (mss_now < TCP_MIN_SND_MSS) ++ mss_now = TCP_MIN_SND_MSS; + return mss_now; + } + +@@ -2727,7 +2727,7 @@ static bool tcp_collapse_retrans(struct + if (next_skb_size <= skb_availroom(skb)) + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), + next_skb_size); +- else if (!skb_shift(skb, next_skb, next_skb_size)) ++ else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size)) + return false; + } + tcp_highest_sack_replace(sk, next_skb, skb); diff --git a/debian/patches/bugfix/all/tcp-tcp_fragment-should-apply-sane-memory-limits.patch b/debian/patches/bugfix/all/tcp-tcp_fragment-should-apply-sane-memory-limits.patch new file mode 100644 index 000000000..e7f46130e --- /dev/null +++ b/debian/patches/bugfix/all/tcp-tcp_fragment-should-apply-sane-memory-limits.patch @@ -0,0 +1,70 @@ +From: Eric Dumazet +Date: Mon, 17 Jun 2019 10:03:52 -0700 +Subject: [PATCH net 2/4] tcp: tcp_fragment() should apply sane memory limits +Origin: https://patchwork.ozlabs.org/patch/1117156/ + +Jonathan Looney reported that a malicious peer can force a sender +to fragment its retransmit queue into tiny skbs, inflating memory +usage and/or overflow 32bit counters. + +TCP allows an application to queue up to sk_sndbuf bytes, +so we need to give some allowance for non malicious splitting +of retransmit queue. + +A new SNMP counter is added to monitor how many times TCP +did not allow to split an skb if the allowance was exceeded. + +Note that this counter might increase in the case applications +use SO_SNDBUF socket option to lower sk_sndbuf. + +CVE-2019-11478 : tcp_fragment, prevent fragmenting a packet when the + socket is already using more than half the allowed space + +Signed-off-by: Eric Dumazet +Reported-by: Jonathan Looney +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Reviewed-by: Tyler Hicks +Cc: Bruce Curtis +Cc: Jonathan Lemon +Acked-by: Jonathan Lemon +--- + include/uapi/linux/snmp.h | 1 + + net/ipv4/proc.c | 1 + + net/ipv4/tcp_output.c | 5 +++++ + 3 files changed, 7 insertions(+) + +--- a/include/uapi/linux/snmp.h ++++ b/include/uapi/linux/snmp.h +@@ -282,6 +282,7 @@ enum + LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ + LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */ + LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ ++ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */ + __LINUX_MIB_MAX + }; + +--- a/net/ipv4/proc.c ++++ b/net/ipv4/proc.c +@@ -290,6 +290,7 @@ static const struct snmp_mib snmp4_net_l + SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), + SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), + SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), ++ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), + SNMP_MIB_SENTINEL + }; + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1299,6 +1299,11 @@ int tcp_fragment(struct sock *sk, enum t + if (nsize < 0) + nsize = 0; + ++ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { ++ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); ++ return -ENOMEM; ++ } ++ + if (skb_unclone(skb, gfp)) + return -ENOMEM; + diff --git a/debian/patches/series b/debian/patches/series index ba723b226..1374828b3 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -222,6 +222,10 @@ bugfix/all/mwifiex-abort-at-too-short-bss-descriptor-element.patch bugfix/all/mwifiex-don-t-abort-on-small-spec-compliant-vendor-ies.patch bugfix/all/mm-mincore.c-make-mincore-more-conservative.patch bugfix/all/mwifiex-fix-heap-overflow-in-mwifiex_uap_parse_tail_.patch +bugfix/all/tcp-limit-payload-size-of-sacked-skbs.patch +bugfix/all/tcp-tcp_fragment-should-apply-sane-memory-limits.patch +bugfix/all/tcp-add-tcp_min_snd_mss-sysctl.patch +bugfix/all/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch # Fix exported symbol versions bugfix/all/module-disable-matching-missing-version-crc.patch