quiche/recovery/gcongestion/
bbr2.rs

1// Copyright (c) 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Copyright (C) 2023, Cloudflare, Inc.
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12//     * Redistributions of source code must retain the above copyright notice,
13//       this list of conditions and the following disclaimer.
14//
15//     * Redistributions in binary form must reproduce the above copyright
16//       notice, this list of conditions and the following disclaimer in the
17//       documentation and/or other materials provided with the distribution.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31mod drain;
32mod mode;
33mod network_model;
34mod probe_bw;
35mod probe_rtt;
36mod startup;
37
38use std::time::Duration;
39use std::time::Instant;
40
41use network_model::BBRv2NetworkModel;
42
43use crate::recovery::gcongestion::Bandwidth;
44use crate::recovery::RecoveryStats;
45
46use self::mode::Mode;
47use self::mode::ModeImpl;
48
49use super::bbr::SendTimeState;
50use super::Acked;
51use super::BbrBwLoReductionStrategy;
52use super::BbrParams;
53use super::CongestionControl;
54use super::Lost;
55use super::RttStats;
56
57const MAX_MODE_CHANGES_PER_CONGESTION_EVENT: usize = 4;
58
59#[derive(Debug)]
60struct Params {
61    // STARTUP parameters.
62    /// The gain for CWND in startup.
63    startup_cwnd_gain: f32,
64
65    startup_pacing_gain: f32,
66
67    /// STARTUP or PROBE_UP are exited if the total bandwidth growth is less
68    /// than `full_bw_threshold` in the last `startup_full_bw_rounds`` round
69    /// trips.
70    full_bw_threshold: f32,
71
72    /// The number of rounds to stay in  STARTUP before exiting due to
73    /// bandwidth plateau.
74    startup_full_bw_rounds: usize,
75
76    /// Number of rounds to stay in STARTUP when there's a sufficient queue that
77    /// bytes_in_flight never drops below the target (1.75 * BDP).  0 indicates
78    /// the feature is disabled and we never exit due to queueing.
79    max_startup_queue_rounds: usize,
80
81    /// The minimum number of loss marking events to exit STARTUP.
82    startup_full_loss_count: usize,
83
84    /// DRAIN parameters.
85    drain_cwnd_gain: f32,
86
87    drain_pacing_gain: f32,
88
89    // PROBE_BW parameters.
90    /// Max number of rounds before probing for Reno-coexistence.
91    probe_bw_probe_max_rounds: usize,
92
93    enable_reno_coexistence: bool,
94
95    /// Multiplier to get Reno-style probe epoch duration as: k * BDP round
96    /// trips. If zero, disables Reno-style BDP-scaled coexistence mechanism.
97    probe_bw_probe_reno_gain: f32,
98
99    /// Minimum duration for BBR-native probes.
100    probe_bw_probe_base_duration: Duration,
101
102    /// The minimum number of loss marking events to exit the PROBE_UP phase.
103    probe_bw_full_loss_count: usize,
104
105    /// Pacing gains.
106    probe_bw_probe_up_pacing_gain: f32,
107    probe_bw_probe_down_pacing_gain: f32,
108    probe_bw_default_pacing_gain: f32,
109
110    probe_bw_cwnd_gain: f32,
111
112    // PROBE_UP parameters.
113    probe_up_ignore_inflight_hi: bool,
114
115    /// Number of rounds to stay in PROBE_UP when there's a sufficient queue
116    /// that bytes_in_flight never drops below the target.  0 indicates the
117    /// feature is disabled and we never exit due to queueing.
118    // TODO(vlad):
119    max_probe_up_queue_rounds: usize,
120
121    // PROBE_RTT parameters.
122    probe_rtt_inflight_target_bdp_fraction: f32,
123
124    /// The default period for entering PROBE_RTT
125    probe_rtt_period: Duration,
126
127    probe_rtt_duration: Duration,
128
129    // Parameters used by multiple modes.
130    /// The initial value of the max ack height filter's window length.
131    initial_max_ack_height_filter_window: usize,
132
133    /// The default fraction of unutilized headroom to try to leave in path
134    /// upon high loss.
135    inflight_hi_headroom: f32,
136
137    /// Estimate startup/bw probing has gone too far if loss rate exceeds this.
138    loss_threshold: f32,
139
140    /// A common factor for multiplicative decreases. Used for adjusting
141    /// `bandwidth_lo``, `inflight_lo`` and `inflight_hi`` upon losses.
142    beta: f32,
143
144    // Experimental flags.
145    add_ack_height_to_queueing_threshold: bool,
146
147    /// Don't run PROBE_RTT on the regular schedule
148    avoid_unnecessary_probe_rtt: bool,
149
150    /// When exiting STARTUP due to loss, set `inflight_hi`` to the max of bdp
151    /// and max bytes delivered in round.
152    limit_inflight_hi_by_max_delivered: bool,
153
154    startup_loss_exit_use_max_delivered_for_inflight_hi: bool,
155
156    /// Increase `inflight_hi`` based on delievered, not inflight.
157    use_bytes_delivered_for_inflight_hi: bool,
158
159    /// Set the pacing gain to 25% larger than the recent BW increase in
160    /// STARTUP.
161    decrease_startup_pacing_at_end_of_round: bool,
162
163    /// Avoid Overestimation in Bandwidth Sampler with ack aggregation
164    enable_overestimate_avoidance: bool,
165
166    /// If true, apply the fix to A0 point selection logic so the
167    /// implementation is consistent with the behavior of the
168    /// google/quiche implementation.
169    choose_a0_point_fix: bool,
170
171    bw_lo_mode: BwLoMode,
172
173    /// Determines whether app limited rounds with no bandwidth growth count
174    /// towards the rounds threshold to exit startup.
175    ignore_app_limited_for_no_bandwidth_growth: bool,
176}
177
178impl Params {
179    fn with_overrides(mut self, custom_bbr_settings: &BbrParams) -> Self {
180        macro_rules! apply_override {
181            ($field:ident) => {
182                if let Some(custom_value) = custom_bbr_settings.$field {
183                    self.$field = custom_value;
184                }
185            };
186        }
187
188        apply_override!(startup_cwnd_gain);
189        apply_override!(startup_pacing_gain);
190        apply_override!(full_bw_threshold);
191        apply_override!(startup_full_loss_count);
192        apply_override!(drain_cwnd_gain);
193        apply_override!(drain_pacing_gain);
194        apply_override!(enable_reno_coexistence);
195        apply_override!(enable_overestimate_avoidance);
196        apply_override!(choose_a0_point_fix);
197        apply_override!(probe_bw_probe_up_pacing_gain);
198        apply_override!(probe_bw_probe_down_pacing_gain);
199        apply_override!(probe_bw_cwnd_gain);
200        apply_override!(max_probe_up_queue_rounds);
201        apply_override!(loss_threshold);
202        apply_override!(use_bytes_delivered_for_inflight_hi);
203        apply_override!(decrease_startup_pacing_at_end_of_round);
204        apply_override!(ignore_app_limited_for_no_bandwidth_growth);
205
206        if let Some(custom_value) = custom_bbr_settings.bw_lo_reduction_strategy {
207            self.bw_lo_mode = custom_value.into();
208        }
209
210        self
211    }
212}
213
214const DEFAULT_PARAMS: Params = Params {
215    startup_cwnd_gain: 2.0,
216
217    startup_pacing_gain: 2.773,
218
219    full_bw_threshold: 1.25,
220
221    startup_full_bw_rounds: 3,
222
223    max_startup_queue_rounds: 0,
224
225    startup_full_loss_count: 8,
226
227    drain_cwnd_gain: 2.0,
228
229    drain_pacing_gain: 1.0 / 2.885,
230
231    probe_bw_probe_max_rounds: 63,
232
233    enable_reno_coexistence: true,
234
235    probe_bw_probe_reno_gain: 1.0,
236
237    probe_bw_probe_base_duration: Duration::from_millis(2000),
238
239    probe_bw_full_loss_count: 2,
240
241    probe_bw_probe_up_pacing_gain: 1.25,
242
243    probe_bw_probe_down_pacing_gain: 0.9, // BBRv3
244
245    probe_bw_default_pacing_gain: 1.0,
246
247    probe_bw_cwnd_gain: 2.25, // BBRv3
248
249    probe_up_ignore_inflight_hi: false,
250
251    max_probe_up_queue_rounds: 2,
252
253    probe_rtt_inflight_target_bdp_fraction: 0.5,
254
255    probe_rtt_period: Duration::from_millis(10000),
256
257    probe_rtt_duration: Duration::from_millis(200),
258
259    initial_max_ack_height_filter_window: 10,
260
261    inflight_hi_headroom: 0.15,
262
263    loss_threshold: 0.015,
264
265    beta: 0.3,
266
267    add_ack_height_to_queueing_threshold: false,
268
269    avoid_unnecessary_probe_rtt: true,
270
271    limit_inflight_hi_by_max_delivered: true,
272
273    startup_loss_exit_use_max_delivered_for_inflight_hi: true,
274
275    use_bytes_delivered_for_inflight_hi: true,
276
277    decrease_startup_pacing_at_end_of_round: true,
278
279    enable_overestimate_avoidance: true,
280
281    choose_a0_point_fix: false,
282
283    bw_lo_mode: BwLoMode::InflightReduction,
284
285    ignore_app_limited_for_no_bandwidth_growth: false,
286};
287
288#[derive(Debug, PartialEq)]
289enum BwLoMode {
290    Default,
291    MinRttReduction,
292    InflightReduction,
293    CwndReduction,
294}
295
296impl From<BbrBwLoReductionStrategy> for BwLoMode {
297    fn from(value: BbrBwLoReductionStrategy) -> Self {
298        match value {
299            BbrBwLoReductionStrategy::Default => BwLoMode::Default,
300            BbrBwLoReductionStrategy::MinRttReduction =>
301                BwLoMode::MinRttReduction,
302            BbrBwLoReductionStrategy::InflightReduction =>
303                BwLoMode::InflightReduction,
304            BbrBwLoReductionStrategy::CwndReduction => BwLoMode::CwndReduction,
305        }
306    }
307}
308
309#[derive(Debug)]
310struct Limits<T: Ord> {
311    lo: T,
312    hi: T,
313}
314
315impl<T: Ord + Clone + Copy> Limits<T> {
316    fn min(&self) -> T {
317        self.lo
318    }
319
320    fn apply_limits(&self, val: T) -> T {
321        val.max(self.lo).min(self.hi)
322    }
323}
324
325impl<T: Ord + Clone + Copy + From<u8>> Limits<T> {
326    pub(crate) fn no_greater_than(val: T) -> Self {
327        Self {
328            lo: T::from(0),
329            hi: val,
330        }
331    }
332}
333
334#[derive(Debug)]
335pub(crate) struct BBRv2 {
336    mode: Mode,
337    cwnd: usize,
338    mss: usize,
339
340    pacing_rate: Bandwidth,
341
342    cwnd_limits: Limits<usize>,
343
344    initial_cwnd: usize,
345
346    last_sample_is_app_limited: bool,
347    has_non_app_limited_sample: bool,
348    last_quiescence_start: Option<Instant>,
349    params: Params,
350}
351
352struct BBRv2CongestionEvent {
353    event_time: Instant,
354
355    /// The congestion window prior to the processing of the ack/loss events.
356    prior_cwnd: usize,
357    /// Total bytes inflight before the processing of the ack/loss events.
358    prior_bytes_in_flight: usize,
359
360    /// Total bytes inflight after the processing of the ack/loss events.
361    bytes_in_flight: usize,
362    /// Total bytes acked from acks in this event.
363    bytes_acked: usize,
364    /// Total bytes lost from losses in this event.
365    bytes_lost: usize,
366
367    /// Whether acked_packets indicates the end of a round trip.
368    end_of_round_trip: bool,
369    // When the event happened, whether the sender is probing for bandwidth.
370    is_probing_for_bandwidth: bool,
371
372    // Maximum bandwidth of all bandwidth samples from acked_packets.
373    // This sample may be app-limited, and will be None if there are no newly
374    // acknowledged inflight packets.
375    sample_max_bandwidth: Option<Bandwidth>,
376
377    /// Minimum rtt of all bandwidth samples from acked_packets.
378    /// None if acked_packets is empty.
379    sample_min_rtt: Option<Duration>,
380
381    /// The send state of the largest packet in acked_packets, unless it is
382    /// empty. If acked_packets is empty, it's the send state of the largest
383    /// packet in lost_packets.
384    last_packet_send_state: SendTimeState,
385}
386
387impl BBRv2CongestionEvent {
388    fn new(
389        event_time: Instant, prior_cwnd: usize, prior_bytes_in_flight: usize,
390        is_probing_for_bandwidth: bool,
391    ) -> Self {
392        BBRv2CongestionEvent {
393            event_time,
394            prior_cwnd,
395            prior_bytes_in_flight,
396            is_probing_for_bandwidth,
397            bytes_in_flight: 0,
398            bytes_acked: 0,
399            bytes_lost: 0,
400            end_of_round_trip: false,
401            last_packet_send_state: Default::default(),
402            sample_max_bandwidth: None,
403            sample_min_rtt: None,
404        }
405    }
406}
407
408impl BBRv2 {
409    pub fn new(
410        initial_congestion_window: usize, max_congestion_window: usize,
411        max_segment_size: usize, smoothed_rtt: Duration,
412        custom_bbr_params: Option<&BbrParams>,
413    ) -> Self {
414        let cwnd = initial_congestion_window * max_segment_size;
415        let params = if let Some(custom_bbr_settings) = custom_bbr_params {
416            DEFAULT_PARAMS.with_overrides(custom_bbr_settings)
417        } else {
418            DEFAULT_PARAMS
419        };
420
421        BBRv2 {
422            mode: Mode::startup(BBRv2NetworkModel::new(&params, smoothed_rtt)),
423            cwnd,
424            pacing_rate: Bandwidth::from_bytes_and_time_delta(cwnd, smoothed_rtt) *
425                2.885,
426            cwnd_limits: Limits {
427                lo: initial_congestion_window * max_segment_size,
428                hi: max_congestion_window * max_segment_size,
429            },
430            initial_cwnd: initial_congestion_window * max_segment_size,
431            last_sample_is_app_limited: false,
432            has_non_app_limited_sample: false,
433            last_quiescence_start: None,
434            mss: max_segment_size,
435            params,
436        }
437    }
438
439    fn on_exit_quiescence(&mut self, now: Instant) {
440        if let Some(last_quiescence_start) = self.last_quiescence_start.take() {
441            self.mode.do_on_exit_quiescence(
442                now,
443                last_quiescence_start,
444                &self.params,
445            )
446        }
447    }
448
449    fn get_target_congestion_window(&self, gain: f32) -> usize {
450        self.mode
451            .bdp(self.mode.bandwidth_estimate(), gain)
452            .max(self.cwnd_limits.min())
453    }
454
455    fn update_pacing_rate(&mut self, bytes_acked: usize) {
456        let bandwidth_estimate = match self.mode.bandwidth_estimate() {
457            e if e == Bandwidth::zero() => return,
458            e => e,
459        };
460
461        if self.mode.total_bytes_acked() == bytes_acked {
462            // After the first ACK, cwnd is still the initial congestion window.
463            self.pacing_rate = Bandwidth::from_bytes_and_time_delta(
464                self.cwnd,
465                self.mode.min_rtt(),
466            );
467            return;
468        }
469
470        let target_rate = bandwidth_estimate * self.mode.pacing_gain();
471        if self.mode.full_bandwidth_reached() {
472            self.pacing_rate = target_rate;
473            return;
474        }
475
476        if self.params.decrease_startup_pacing_at_end_of_round &&
477            self.mode.pacing_gain() < self.params.startup_pacing_gain
478        {
479            self.pacing_rate = target_rate;
480            return;
481        }
482
483        if self.params.bw_lo_mode != BwLoMode::Default &&
484            self.mode.loss_events_in_round() > 0
485        {
486            self.pacing_rate = target_rate;
487            return;
488        }
489
490        // By default, the pacing rate never decreases in STARTUP.
491        self.pacing_rate = self.pacing_rate.max(target_rate);
492    }
493
494    fn update_congestion_window(&mut self, bytes_acked: usize) {
495        let mut target_cwnd =
496            self.get_target_congestion_window(self.mode.cwnd_gain());
497
498        let prior_cwnd = self.cwnd;
499        if self.mode.full_bandwidth_reached() {
500            target_cwnd += self.mode.max_ack_height();
501            self.cwnd = target_cwnd.min(prior_cwnd + bytes_acked);
502        } else if prior_cwnd < target_cwnd || prior_cwnd < 2 * self.initial_cwnd {
503            self.cwnd = prior_cwnd + bytes_acked;
504        }
505
506        self.cwnd = self
507            .mode
508            .get_cwnd_limits(&self.params)
509            .apply_limits(self.cwnd);
510        self.cwnd = self.cwnd_limits.apply_limits(self.cwnd);
511    }
512
513    fn on_enter_quiescence(&mut self, time: Instant) {
514        self.last_quiescence_start = Some(time);
515    }
516
517    fn target_bytes_inflight(&self) -> usize {
518        let bdp = self.mode.bdp1(self.mode.bandwidth_estimate());
519        bdp.min(self.get_congestion_window())
520    }
521}
522
523impl CongestionControl for BBRv2 {
524    #[cfg(feature = "qlog")]
525    fn state_str(&self) -> &'static str {
526        self.mode.state_str()
527    }
528
529    fn get_congestion_window(&self) -> usize {
530        self.cwnd
531    }
532
533    fn get_congestion_window_in_packets(&self) -> usize {
534        self.cwnd / self.mss
535    }
536
537    fn can_send(&self, bytes_in_flight: usize) -> bool {
538        bytes_in_flight < self.get_congestion_window()
539    }
540
541    fn on_packet_sent(
542        &mut self, sent_time: Instant, bytes_in_flight: usize,
543        packet_number: u64, bytes: usize, is_retransmissible: bool,
544        rtt_stats: &RttStats,
545    ) {
546        if bytes_in_flight == 0 && self.params.avoid_unnecessary_probe_rtt {
547            self.on_exit_quiescence(sent_time);
548        }
549
550        self.mode.on_packet_sent(
551            sent_time,
552            bytes_in_flight,
553            packet_number,
554            bytes,
555            is_retransmissible,
556            rtt_stats,
557        );
558    }
559
560    fn on_congestion_event(
561        &mut self, _rtt_updated: bool, prior_in_flight: usize,
562        _bytes_in_flight: usize, event_time: Instant, acked_packets: &[Acked],
563        lost_packets: &[Lost], least_unacked: u64, _rtt_stats: &RttStats,
564        recovery_stats: &mut RecoveryStats,
565    ) {
566        let mut congestion_event = BBRv2CongestionEvent::new(
567            event_time,
568            self.cwnd,
569            prior_in_flight,
570            self.mode.is_probing_for_bandwidth(),
571        );
572
573        self.mode.on_congestion_event_start(
574            acked_packets,
575            lost_packets,
576            &mut congestion_event,
577            &self.params,
578        );
579
580        // Number of mode changes allowed for this congestion event.
581        let mut mode_changes_allowed = MAX_MODE_CHANGES_PER_CONGESTION_EVENT;
582        while mode_changes_allowed > 0 &&
583            self.mode.do_on_congestion_event(
584                prior_in_flight,
585                event_time,
586                acked_packets,
587                lost_packets,
588                &mut congestion_event,
589                self.target_bytes_inflight(),
590                &self.params,
591                recovery_stats,
592                self.get_congestion_window(),
593            )
594        {
595            mode_changes_allowed -= 1;
596        }
597
598        self.update_pacing_rate(congestion_event.bytes_acked);
599
600        self.update_congestion_window(congestion_event.bytes_acked);
601
602        self.mode
603            .on_congestion_event_finish(least_unacked, &congestion_event);
604        self.last_sample_is_app_limited =
605            congestion_event.last_packet_send_state.is_app_limited;
606        if !self.last_sample_is_app_limited {
607            self.has_non_app_limited_sample = true;
608        }
609        if congestion_event.bytes_in_flight == 0 &&
610            self.params.avoid_unnecessary_probe_rtt
611        {
612            self.on_enter_quiescence(event_time);
613        }
614    }
615
616    fn on_packet_neutered(&mut self, packet_number: u64) {
617        self.mode.on_packet_neutered(packet_number);
618    }
619
620    fn on_retransmission_timeout(&mut self, _packets_retransmitted: bool) {}
621
622    fn on_connection_migration(&mut self) {}
623
624    fn is_in_recovery(&self) -> bool {
625        // TODO(vlad): is this true?
626        self.last_quiescence_start.is_none()
627    }
628
629    fn is_cwnd_limited(&self, bytes_in_flight: usize) -> bool {
630        bytes_in_flight >= self.get_congestion_window()
631    }
632
633    fn pacing_rate(
634        &self, _bytes_in_flight: usize, _rtt_stats: &RttStats,
635    ) -> Bandwidth {
636        self.pacing_rate
637    }
638
639    fn bandwidth_estimate(&self, _rtt_stats: &RttStats) -> Bandwidth {
640        self.mode.bandwidth_estimate()
641    }
642
643    fn update_mss(&mut self, new_mss: usize) {
644        self.cwnd_limits.hi = (self.cwnd_limits.hi as u64 * new_mss as u64 /
645            self.mss as u64) as usize;
646        self.cwnd_limits.lo = (self.cwnd_limits.lo as u64 * new_mss as u64 /
647            self.mss as u64) as usize;
648        self.cwnd =
649            (self.cwnd as u64 * new_mss as u64 / self.mss as u64) as usize;
650        self.initial_cwnd = (self.initial_cwnd as u64 * new_mss as u64 /
651            self.mss as u64) as usize;
652        self.mss = new_mss;
653    }
654
655    fn on_app_limited(&mut self, bytes_in_flight: usize) {
656        if bytes_in_flight >= self.get_congestion_window() {
657            return;
658        }
659
660        self.mode.on_app_limited()
661    }
662
663    fn limit_cwnd(&mut self, max_cwnd: usize) {
664        self.cwnd_limits.hi = max_cwnd
665    }
666}