????在2.6.32內(nèi)核中,基于丟包的擁塞算法基本都需要考慮,delay_ack所帶來的影響。例如每個(gè)ack都確認(rèn)兩個(gè)數(shù)據(jù)包,如果被擁塞算法當(dāng)做一個(gè)ack確認(rèn)一個(gè)數(shù)據(jù)包,那窗口的增加速率必然下降。在4.9內(nèi)核版本之前,由于擁塞窗口接口函數(shù)原型中并不攜帶本次ack確認(rèn)多少個(gè)數(shù)據(jù)包信息,當(dāng)擁塞窗口進(jìn)行調(diào)整時(shí),確需要考慮到delay_ack進(jìn)行適當(dāng)?shù)恼{(diào)整。主要的過程是使用類似計(jì)算srtt_us的方式,估算一個(gè)ack平均確認(rèn)多少個(gè)數(shù)據(jù)包,在擁塞窗口調(diào)整時(shí),對(duì)調(diào)整幅度進(jìn)行微調(diào)。
主要數(shù)據(jù)結(jié)構(gòu)
????struct tcp_congestion_ops {
????struct list_head list;
????unsigned long flags;
????/* initialize private data (optional) */
????void (*init)(struct sock *sk);
????/* cleanup private data? (optional) */
????void (*release)(struct sock *sk);
????/* return slow start threshold (required) */
????u32 (*ssthresh)(struct sock *sk);
????/* lower bound for congestion window (optional) */
????u32 (*min_cwnd)(const struct sock *sk);
????/* do new cwnd calculation (required) */
????void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight);//擁塞窗口調(diào)整接口
????/* call before changing ca_state (optional) */
????...
????void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);//收到ack確認(rèn)數(shù)據(jù)包調(diào)用接口
????/* get info for inet_diag (optional) */
????void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
????char name[TCP_CA_NAME_MAX];
????struct module *owner;
????};
需要注意的是4.9內(nèi)核中擁塞窗口調(diào)整函數(shù)原型已經(jīng)變?yōu)?/p>
????void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
不同之處在于最后一個(gè)變量,網(wǎng)絡(luò)中存在數(shù)據(jù)包個(gè)數(shù)in_flight變?yōu)榱吮敬蝍ck確認(rèn)的數(shù)據(jù)包個(gè)數(shù)。已經(jīng)將該ack確認(rèn)的數(shù)據(jù)包個(gè)數(shù)傳遞進(jìn)來,因此,不需要再進(jìn)行delay_ack的比例估算。
以2.6.32內(nèi)核中的bic算法為例,使用計(jì)算平均delay_ack比例在擁塞窗口調(diào)整函數(shù)bictcp_cong_avoid中調(diào)用bictcp_update函數(shù)
????static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
????{
????struct tcp_sock *tp = tcp_sk(sk);
????struct bictcp *ca = inet_csk_ca(sk);
????if (!tcp_is_cwnd_limited(sk, in_flight))
????????return;
????if (tp->snd_cwnd <= tp->snd_ssthresh)
????????tcp_slow_start(tp);
????else {
????????bictcp_update(ca, tp->snd_cwnd);
????????tcp_cong_avoid_ai(tp, ca->cnt);
????}
????}
bictcp_update函數(shù)中結(jié)尾部分,對(duì)控制調(diào)整擁塞窗口快慢的變量ca->cnt進(jìn)行按比例調(diào)整。
????static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
????{
????....
????ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack; //對(duì)調(diào)整擁塞窗口的幅度進(jìn)行按比例調(diào)整
????if (ca->cnt == 0) /* cannot be zero */
????ca->cnt = 1;
????}
而估算delay_ack比例的部分在(*pkts_acked)接口函數(shù)中進(jìn)行
????#define ACK_RATIO_SHIFT 4
????/* Track delayed acknowledgment ratio using sliding window
????* ratio = (15*ratio + sample) / 16
????*/
????static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
????{
????const struct inet_connection_sock *icsk = inet_csk(sk);
????if (icsk->icsk_ca_state == TCP_CA_Open) {
????????struct bictcp *ca = inet_csk_ca(sk);
????????cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
????????ca->delayed_ack += cnt;
????}
????}
代碼比較簡(jiǎn)單就不分析了,但是存在一個(gè)bug,假設(shè)初始狀態(tài)ca->delayed_ack = 32,每次ack都不是delay_ack,都確認(rèn)一個(gè)數(shù)據(jù)包,ca->delay_ack最小值卻停留在31,不會(huì)繼續(xù)減小。原因就是先ca->delay_ack/16,由于整型除法是向下取整,因此
cubic版本中對(duì)delay_ack的計(jì)算也類似,也存在ca->delay_ack最小值為31的問題,不同的是增加了最大值限制。
????#define ACK_RATIO_SHIFT 4
????#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
????static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
????{
????????const struct inet_connection_sock *icsk = inet_csk(sk);
????????const struct tcp_sock *tp = tcp_sk(sk);
????????struct bictcp *ca = inet_csk_ca(sk);
????????u32 delay;
????????if (icsk->icsk_ca_state == TCP_CA_Open) {
????????????u32 ratio = ca->delayed_ack;
????????????ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
????????????ratio += cnt;
????????????ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);//最大比例不能超過32
????????}
????????......
????}