quiche/recovery/gcongestion/
bbr2.rs

1// Copyright (c) 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Copyright (C) 2023, Cloudflare, Inc.
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12//     * Redistributions of source code must retain the above copyright notice,
13//       this list of conditions and the following disclaimer.
14//
15//     * Redistributions in binary form must reproduce the above copyright
16//       notice, this list of conditions and the following disclaimer in the
17//       documentation and/or other materials provided with the distribution.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31mod drain;
32mod mode;
33mod network_model;
34mod probe_bw;
35mod probe_rtt;
36mod startup;
37
38use std::time::Duration;
39use std::time::Instant;
40
41use network_model::BBRv2NetworkModel;
42
43use self::mode::Mode;
44use self::mode::ModeImpl;
45
46use super::bandwidth::Bandwidth;
47use super::bbr::SendTimeState;
48use super::Acked;
49use super::CongestionControl;
50use super::Lost;
51use super::RttStats;
52
53const MAX_MODE_CHANGES_PER_CONGESTION_EVENT: usize = 4;
54
55#[derive(PartialEq)]
56#[allow(dead_code)]
57enum BwLoMode {
58    Default,
59    MinRttReduction,
60    InflightReduction,
61    CwndReduction,
62}
63
64struct Params {
65    // STARTUP parameters.
66    /// The gain for CWND in startup.
67    startup_cwnd_gain: f32,
68
69    startup_pacing_gain: f32,
70
71    /// STARTUP or PROBE_UP are exited if the total bandwidth growth is less
72    /// than `full_bw_threshold` in the last `startup_full_bw_rounds`` round
73    /// trips.
74    full_bw_threshold: f32,
75
76    startup_full_bw_rounds: usize,
77
78    /// Number of rounds to stay in STARTUP when there's a sufficient queue that
79    /// bytes_in_flight never drops below the target (1.75 * BDP).  0 indicates
80    /// the feature is disabled and we never exit due to queueing.
81    max_startup_queue_rounds: usize,
82
83    /// The minimum number of loss marking events to exit STARTUP.
84    startup_full_loss_count: usize,
85
86    /// DRAIN parameters.
87    drain_cwnd_gain: f32,
88
89    drain_pacing_gain: f32,
90
91    // PROBE_BW parameters.
92    /// Max number of rounds before probing for Reno-coexistence.
93    probe_bw_probe_max_rounds: usize,
94
95    enable_reno_coexistence: bool,
96
97    /// Multiplier to get Reno-style probe epoch duration as: k * BDP round
98    /// trips. If zero, disables Reno-style BDP-scaled coexistence mechanism.
99    probe_bw_probe_reno_gain: f32,
100
101    /// Minimum duration for BBR-native probes.
102    probe_bw_probe_base_duration: Duration,
103
104    /// The minimum number of loss marking events to exit the PROBE_UP phase.
105    probe_bw_full_loss_count: usize,
106
107    /// Pacing gains.
108    probe_bw_probe_up_pacing_gain: f32,
109    probe_bw_probe_down_pacing_gain: f32,
110    probe_bw_default_pacing_gain: f32,
111
112    probe_bw_cwnd_gain: f32,
113
114    // PROBE_UP parameters.
115    probe_up_ignore_inflight_hi: bool,
116
117    /// Number of rounds to stay in PROBE_UP when there's a sufficient queue
118    /// that bytes_in_flight never drops below the target.  0 indicates the
119    /// feature is disabled and we never exit due to queueing.
120    // TODO(vlad):
121    max_probe_up_queue_rounds: usize,
122
123    // PROBE_RTT parameters.
124    probe_rtt_inflight_target_bdp_fraction: f32,
125
126    /// The default period for entering PROBE_RTT
127    probe_rtt_period: Duration,
128
129    probe_rtt_duration: Duration,
130
131    // Parameters used by multiple modes.
132    /// The initial value of the max ack height filter's window length.
133    initial_max_ack_height_filter_window: usize,
134
135    /// The default fraction of unutilized headroom to try to leave in path
136    /// upon high loss.
137    inflight_hi_headroom: f32,
138
139    /// Estimate startup/bw probing has gone too far if loss rate exceeds this.
140    loss_threshold: f32,
141
142    /// A common factor for multiplicative decreases. Used for adjusting
143    /// `bandwidth_lo``, `inflight_lo`` and `inflight_hi`` upon losses.
144    beta: f32,
145
146    // Experimental flags.
147    add_ack_height_to_queueing_threshold: bool,
148
149    /// Don't run PROBE_RTT on the regular schedule
150    avoid_unnecessary_probe_rtt: bool,
151
152    /// When exiting STARTUP due to loss, set `inflight_hi`` to the max of bdp
153    /// and max bytes delivered in round.
154    limit_inflight_hi_by_max_delivered: bool,
155
156    startup_loss_exit_use_max_delivered_for_inflight_hi: bool,
157
158    /// Increase `inflight_hi`` based on delievered, not inflight.
159    use_bytes_delivered_for_inflight_hi: bool,
160
161    /// Set the pacing gain to 25% larger than the recent BW increase in
162    /// STARTUP.
163    decrease_startup_pacing_at_end_of_round: bool,
164
165    /// Avoid Overestimation in Bandwidth Sampler with ack aggregation
166    overestimate_avoidance: bool,
167
168    bw_lo_mode: BwLoMode,
169}
170
171const PARAMS: Params = Params {
172    startup_cwnd_gain: 2.0,
173
174    startup_pacing_gain: 2.773,
175
176    full_bw_threshold: 1.25,
177
178    startup_full_bw_rounds: 3,
179
180    max_startup_queue_rounds: 0,
181
182    startup_full_loss_count: 8,
183
184    drain_cwnd_gain: 2.0,
185
186    drain_pacing_gain: 1.0 / 2.885,
187
188    probe_bw_probe_max_rounds: 63,
189
190    enable_reno_coexistence: true,
191
192    probe_bw_probe_reno_gain: 1.0,
193
194    probe_bw_probe_base_duration: Duration::from_millis(2000),
195
196    probe_bw_full_loss_count: 2,
197
198    probe_bw_probe_up_pacing_gain: 1.25,
199
200    probe_bw_probe_down_pacing_gain: 0.9, // BBRv3
201
202    probe_bw_default_pacing_gain: 1.0,
203
204    probe_bw_cwnd_gain: 2.25, // BBRv3
205
206    probe_up_ignore_inflight_hi: false,
207
208    max_probe_up_queue_rounds: 2,
209
210    probe_rtt_inflight_target_bdp_fraction: 0.5,
211
212    probe_rtt_period: Duration::from_millis(10000),
213
214    probe_rtt_duration: Duration::from_millis(200),
215
216    initial_max_ack_height_filter_window: 10,
217
218    inflight_hi_headroom: 0.15,
219
220    loss_threshold: 0.015,
221
222    beta: 0.3,
223
224    add_ack_height_to_queueing_threshold: false,
225
226    avoid_unnecessary_probe_rtt: true,
227
228    limit_inflight_hi_by_max_delivered: true,
229
230    startup_loss_exit_use_max_delivered_for_inflight_hi: true,
231
232    use_bytes_delivered_for_inflight_hi: true,
233
234    decrease_startup_pacing_at_end_of_round: true,
235
236    overestimate_avoidance: true,
237
238    bw_lo_mode: BwLoMode::InflightReduction,
239};
240
241#[derive(Debug)]
242struct Limits<T: Ord> {
243    lo: T,
244    hi: T,
245}
246
247impl<T: Ord + Clone + Copy> Limits<T> {
248    fn min(&self) -> T {
249        self.lo
250    }
251
252    fn apply_limits(&self, val: T) -> T {
253        val.max(self.lo).min(self.hi)
254    }
255}
256
257impl<T: Ord + Clone + Copy + From<u8>> Limits<T> {
258    pub(crate) fn no_greater_than(val: T) -> Self {
259        Self {
260            lo: T::from(0),
261            hi: val,
262        }
263    }
264}
265
266#[derive(Debug)]
267pub(crate) struct BBRv2 {
268    mode: Mode,
269    cwnd: usize,
270    mss: usize,
271
272    pacing_rate: Bandwidth,
273
274    cwnd_limits: Limits<usize>,
275
276    initial_cwnd: usize,
277
278    last_sample_is_app_limited: bool,
279    has_non_app_limited_sample: bool,
280    last_quiescence_start: Option<Instant>,
281}
282
283struct BBRv2CongestionEvent {
284    event_time: Instant,
285
286    /// The congestion window prior to the processing of the ack/loss events.
287    prior_cwnd: usize,
288    /// Total bytes inflight before the processing of the ack/loss events.
289    prior_bytes_in_flight: usize,
290
291    /// Total bytes inflight after the processing of the ack/loss events.
292    bytes_in_flight: usize,
293    /// Total bytes acked from acks in this event.
294    bytes_acked: usize,
295    /// Total bytes lost from losses in this event.
296    bytes_lost: usize,
297
298    /// Whether acked_packets indicates the end of a round trip.
299    end_of_round_trip: bool,
300    // When the event happened, whether the sender is probing for bandwidth.
301    is_probing_for_bandwidth: bool,
302
303    // Maximum bandwidth of all bandwidth samples from acked_packets.
304    // This sample may be app-limited, and will be None if there are no newly
305    // acknowledged inflight packets.
306    sample_max_bandwidth: Option<Bandwidth>,
307
308    /// Minimum rtt of all bandwidth samples from acked_packets.
309    /// None if acked_packets is empty.
310    sample_min_rtt: Option<Duration>,
311
312    /// The send state of the largest packet in acked_packets, unless it is
313    /// empty. If acked_packets is empty, it's the send state of the largest
314    /// packet in lost_packets.
315    last_packet_send_state: SendTimeState,
316}
317
318impl BBRv2CongestionEvent {
319    fn new(
320        event_time: Instant, prior_cwnd: usize, prior_bytes_in_flight: usize,
321        is_probing_for_bandwidth: bool,
322    ) -> Self {
323        BBRv2CongestionEvent {
324            event_time,
325            prior_cwnd,
326            prior_bytes_in_flight,
327            is_probing_for_bandwidth,
328            bytes_in_flight: 0,
329            bytes_acked: 0,
330            bytes_lost: 0,
331            end_of_round_trip: false,
332            last_packet_send_state: Default::default(),
333            sample_max_bandwidth: None,
334            sample_min_rtt: None,
335        }
336    }
337}
338
339impl BBRv2 {
340    pub fn new(
341        initial_congestion_window: usize, max_congestion_window: usize,
342        max_segment_size: usize, smoothed_rtt: Duration,
343    ) -> Self {
344        let cwnd = initial_congestion_window * max_segment_size;
345        BBRv2 {
346            mode: Mode::startup(BBRv2NetworkModel::new(
347                PARAMS.startup_cwnd_gain,
348                PARAMS.startup_pacing_gain,
349                PARAMS.overestimate_avoidance,
350            )),
351            cwnd,
352            pacing_rate: Bandwidth::from_bytes_and_time_delta(cwnd, smoothed_rtt) *
353                2.885,
354            cwnd_limits: Limits {
355                lo: initial_congestion_window * max_segment_size,
356                hi: max_congestion_window * max_segment_size,
357            },
358            initial_cwnd: initial_congestion_window * max_segment_size,
359            last_sample_is_app_limited: false,
360            has_non_app_limited_sample: false,
361            last_quiescence_start: None,
362            mss: max_segment_size,
363        }
364    }
365
366    fn on_exit_quiescence(&mut self, now: Instant) {
367        if let Some(last_quiescence_start) = self.last_quiescence_start.take() {
368            self.mode.do_on_exit_quiescence(now, last_quiescence_start)
369        }
370    }
371
372    fn get_target_congestion_window(&self, gain: f32) -> usize {
373        self.mode
374            .bdp(self.mode.bandwidth_estimate(), gain)
375            .max(self.cwnd_limits.min())
376    }
377
378    fn update_pacing_rate(&mut self, bytes_acked: usize) {
379        let bandwidth_estimate = match self.mode.bandwidth_estimate() {
380            e if e == Bandwidth::zero() => return,
381            e => e,
382        };
383
384        if self.mode.total_bytes_acked() == bytes_acked {
385            // After the first ACK, cwnd is still the initial congestion window.
386            self.pacing_rate = Bandwidth::from_bytes_and_time_delta(
387                self.cwnd,
388                self.mode.min_rtt(),
389            );
390            return;
391        }
392
393        let target_rate = bandwidth_estimate * self.mode.pacing_gain();
394        if self.mode.full_bandwidth_reached() {
395            self.pacing_rate = target_rate;
396            return;
397        }
398
399        if PARAMS.decrease_startup_pacing_at_end_of_round &&
400            self.mode.pacing_gain() < PARAMS.startup_pacing_gain
401        {
402            self.pacing_rate = target_rate;
403            return;
404        }
405
406        if PARAMS.bw_lo_mode != BwLoMode::Default &&
407            self.mode.loss_events_in_round() > 0
408        {
409            self.pacing_rate = target_rate;
410            return;
411        }
412
413        // By default, the pacing rate never decreases in STARTUP.
414        self.pacing_rate = self.pacing_rate.max(target_rate);
415    }
416
417    fn update_congestion_window(&mut self, bytes_acked: usize) {
418        let mut target_cwnd =
419            self.get_target_congestion_window(self.mode.cwnd_gain());
420
421        let prior_cwnd = self.cwnd;
422        if self.mode.full_bandwidth_reached() {
423            target_cwnd += self.mode.max_ack_height();
424            self.cwnd = target_cwnd.min(prior_cwnd + bytes_acked);
425        } else if prior_cwnd < target_cwnd || prior_cwnd < 2 * self.initial_cwnd {
426            self.cwnd = prior_cwnd + bytes_acked;
427        }
428
429        self.cwnd = self.mode.get_cwnd_limits().apply_limits(self.cwnd);
430        self.cwnd = self.cwnd_limits.apply_limits(self.cwnd);
431    }
432
433    fn on_enter_quiescence(&mut self, time: Instant) {
434        self.last_quiescence_start = Some(time);
435    }
436
437    fn target_bytes_inflight(&self) -> usize {
438        let bdp = self.mode.bdp1(self.mode.bandwidth_estimate());
439        bdp.min(self.get_congestion_window())
440    }
441}
442
443impl CongestionControl for BBRv2 {
444    fn get_congestion_window(&self) -> usize {
445        self.cwnd
446    }
447
448    fn get_congestion_window_in_packets(&self) -> usize {
449        self.cwnd / self.mss
450    }
451
452    fn can_send(&self, bytes_in_flight: usize) -> bool {
453        bytes_in_flight < self.get_congestion_window()
454    }
455
456    fn on_packet_sent(
457        &mut self, sent_time: std::time::Instant, bytes_in_flight: usize,
458        packet_number: u64, bytes: usize, is_retransmissible: bool,
459        rtt_stats: &RttStats,
460    ) {
461        if bytes_in_flight == 0 && PARAMS.avoid_unnecessary_probe_rtt {
462            self.on_exit_quiescence(sent_time);
463        }
464
465        self.mode.on_packet_sent(
466            sent_time,
467            bytes_in_flight,
468            packet_number,
469            bytes,
470            is_retransmissible,
471            rtt_stats,
472        );
473    }
474
475    fn on_congestion_event(
476        &mut self, _rtt_updated: bool, prior_in_flight: usize,
477        _bytes_in_flight: usize, event_time: Instant, acked_packets: &[Acked],
478        lost_packets: &[Lost], least_unacked: u64, _rtt_stats: &RttStats,
479    ) {
480        let mut congestion_event = BBRv2CongestionEvent::new(
481            event_time,
482            self.cwnd,
483            prior_in_flight,
484            self.mode.is_probing_for_bandwidth(),
485        );
486
487        self.mode.on_congestion_event_start(
488            acked_packets,
489            lost_packets,
490            &mut congestion_event,
491        );
492
493        // Number of mode changes allowed for this congestion event.
494        let mut mode_changes_allowed = MAX_MODE_CHANGES_PER_CONGESTION_EVENT;
495        while mode_changes_allowed > 0 &&
496            self.mode.do_on_congestion_event(
497                prior_in_flight,
498                event_time,
499                acked_packets,
500                lost_packets,
501                &mut congestion_event,
502                self.target_bytes_inflight(),
503            )
504        {
505            mode_changes_allowed -= 1;
506        }
507
508        self.update_pacing_rate(congestion_event.bytes_acked);
509
510        self.update_congestion_window(congestion_event.bytes_acked);
511
512        self.mode
513            .on_congestion_event_finish(least_unacked, &congestion_event);
514        self.last_sample_is_app_limited =
515            congestion_event.last_packet_send_state.is_app_limited;
516        if !self.last_sample_is_app_limited {
517            self.has_non_app_limited_sample = true;
518        }
519        if congestion_event.bytes_in_flight == 0 &&
520            PARAMS.avoid_unnecessary_probe_rtt
521        {
522            self.on_enter_quiescence(event_time);
523        }
524    }
525
526    fn on_packet_neutered(&mut self, packet_number: u64) {
527        self.mode.on_packet_neutered(packet_number);
528    }
529
530    fn on_retransmission_timeout(&mut self, _packets_retransmitted: bool) {}
531
532    fn on_connection_migration(&mut self) {}
533
534    fn is_in_recovery(&self) -> bool {
535        // TODO(vlad): is this true?
536        self.last_quiescence_start.is_none()
537    }
538
539    fn is_cwnd_limited(&self, bytes_in_flight: usize) -> bool {
540        bytes_in_flight >= self.get_congestion_window()
541    }
542
543    fn pacing_rate(
544        &self, _bytes_in_flight: usize, _rtt_stats: &super::RttStats,
545    ) -> Bandwidth {
546        self.pacing_rate
547    }
548
549    fn bandwidth_estimate(&self, _rtt_stats: &super::RttStats) -> Bandwidth {
550        self.mode.bandwidth_estimate()
551    }
552
553    fn update_mss(&mut self, new_mss: usize) {
554        self.cwnd_limits.hi = self.cwnd_limits.hi * new_mss / self.mss;
555        self.cwnd_limits.lo = self.cwnd_limits.lo * new_mss / self.mss;
556        self.cwnd = self.cwnd * new_mss / self.mss;
557        self.initial_cwnd = self.initial_cwnd * new_mss / self.mss;
558        self.mss = new_mss;
559    }
560
561    fn on_app_limited(&mut self, bytes_in_flight: usize) {
562        if bytes_in_flight >= self.get_congestion_window() {
563            return;
564        }
565
566        self.mode.on_app_limited()
567    }
568
569    fn limit_cwnd(&mut self, max_cwnd: usize) {
570        self.cwnd_limits.hi = max_cwnd
571    }
572}