Skip to content

Commit 7c106d7

Browse files
hswong3iDavid S. Miller
authored andcommitted
[TCP]: TCP Low Priority congestion control
TCP Low Priority is a distributed algorithm whose goal is to utilize only the excess network bandwidth as compared to the ``fair share`` of bandwidth as targeted by TCP. Available from: http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf Original Author: Aleksandar Kuzmanovic <[email protected]> See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation. As of 2.6.13, Linux supports pluggable congestion control algorithms. Due to the limitation of the API, we take the following changes from the original TCP-LP implementation: o We use newReno in most core CA handling. Only add some checking within cong_avoid. o Error correcting in remote HZ, therefore remote HZ will be keeped on checking and updating. o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne OWD have a similar meaning as RTT. Also correct the buggy formular. o Handle reaction for Early Congestion Indication (ECI) within pkts_acked, as mentioned within pseudo code. o OWD is handled in relative format, where local time stamp will in tcp_time_stamp format. Port from 2.4.19 to 2.6.16 as module by: Wong Hoi Sing Edison <[email protected]> Hung Hing Lun <[email protected]> Signed-off-by: Wong Hoi Sing Edison <[email protected]> Signed-off-by: Stephen Hemminger <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 2f45c34 commit 7c106d7

File tree

3 files changed

+349
-0
lines changed

3 files changed

+349
-0
lines changed

net/ipv4/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,16 @@ config TCP_CONG_SCALABLE
550550
properties, though is known to have fairness issues.
551551
See http://www-lce.eng.cam.ac.uk/~ctk21/scalable/
552552

553+
config TCP_CONG_LP
554+
tristate "TCP Low Priority"
555+
depends on EXPERIMENTAL
556+
default n
557+
---help---
558+
TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
559+
to utiliza only the excess network bandwidth as compared to the
560+
``fair share`` of bandwidth as targeted by TCP.
561+
See http://www-ece.rice.edu/networks/TCP-LP/
562+
553563
endmenu
554564

555565
config TCP_CONG_BIC

net/ipv4/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
4444
obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
4545
obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
4646
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
47+
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
4748

4849
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
4950
xfrm4_output.o

net/ipv4/tcp_lp.c

Lines changed: 338 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
/*
2+
* TCP Low Priority (TCP-LP)
3+
*
4+
* TCP Low Priority is a distributed algorithm whose goal is to utilize only
5+
* the excess network bandwidth as compared to the ``fair share`` of
6+
* bandwidth as targeted by TCP. Available from:
7+
* http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf
8+
*
9+
* Original Author:
10+
* Aleksandar Kuzmanovic <[email protected]>
11+
*
12+
* See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation.
13+
* As of 2.6.13, Linux supports pluggable congestion control algorithms.
14+
* Due to the limitation of the API, we take the following changes from
15+
* the original TCP-LP implementation:
16+
* o We use newReno in most core CA handling. Only add some checking
17+
* within cong_avoid.
18+
* o Error correcting in remote HZ, therefore remote HZ will be keeped
19+
* on checking and updating.
20+
* o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne
21+
* OWD have a similar meaning as RTT. Also correct the buggy formular.
22+
* o Handle reaction for Early Congestion Indication (ECI) within
23+
* pkts_acked, as mentioned within pseudo code.
24+
* o OWD is handled in relative format, where local time stamp will in
25+
* tcp_time_stamp format.
26+
*
27+
* Port from 2.4.19 to 2.6.16 as module by:
28+
* Wong Hoi Sing Edison <[email protected]>
29+
* Hung Hing Lun <[email protected]>
30+
*
31+
* Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $
32+
*/
33+
34+
#include <linux/config.h>
35+
#include <linux/module.h>
36+
#include <net/tcp.h>
37+
38+
/* resolution of owd */
39+
#define LP_RESOL 1000
40+
41+
/**
42+
* enum tcp_lp_state
43+
* @LP_VALID_RHZ: is remote HZ valid?
44+
* @LP_VALID_OWD: is OWD valid?
45+
* @LP_WITHIN_THR: are we within threshold?
46+
* @LP_WITHIN_INF: are we within inference?
47+
*
48+
* TCP-LP's state flags.
49+
* We create this set of state flag mainly for debugging.
50+
*/
51+
enum tcp_lp_state {
52+
LP_VALID_RHZ = (1 << 0),
53+
LP_VALID_OWD = (1 << 1),
54+
LP_WITHIN_THR = (1 << 3),
55+
LP_WITHIN_INF = (1 << 4),
56+
};
57+
58+
/**
59+
* struct lp
60+
* @flag: TCP-LP state flag
61+
* @sowd: smoothed OWD << 3
62+
* @owd_min: min OWD
63+
* @owd_max: max OWD
64+
* @owd_max_rsv: resrved max owd
65+
* @remote_hz: estimated remote HZ
66+
* @remote_ref_time: remote reference time
67+
* @local_ref_time: local reference time
68+
* @last_drop: time for last active drop
69+
* @inference: current inference
70+
*
71+
* TCP-LP's private struct.
72+
* We get the idea from original TCP-LP implementation where only left those we
73+
* found are really useful.
74+
*/
75+
struct lp {
76+
u32 flag;
77+
u32 sowd;
78+
u32 owd_min;
79+
u32 owd_max;
80+
u32 owd_max_rsv;
81+
u32 remote_hz;
82+
u32 remote_ref_time;
83+
u32 local_ref_time;
84+
u32 last_drop;
85+
u32 inference;
86+
};
87+
88+
/**
89+
* tcp_lp_init
90+
*
91+
* Init all required variables.
92+
* Clone the handling from Vegas module implementation.
93+
*/
94+
static void tcp_lp_init(struct sock *sk)
95+
{
96+
struct lp *lp = inet_csk_ca(sk);
97+
98+
lp->flag = 0;
99+
lp->sowd = 0;
100+
lp->owd_min = 0xffffffff;
101+
lp->owd_max = 0;
102+
lp->owd_max_rsv = 0;
103+
lp->remote_hz = 0;
104+
lp->remote_ref_time = 0;
105+
lp->local_ref_time = 0;
106+
lp->last_drop = 0;
107+
lp->inference = 0;
108+
}
109+
110+
/**
111+
* tcp_lp_cong_avoid
112+
*
113+
* Implementation of cong_avoid.
114+
* Will only call newReno CA when away from inference.
115+
* From TCP-LP's paper, this will be handled in additive increasement.
116+
*/
117+
static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
118+
int flag)
119+
{
120+
struct lp *lp = inet_csk_ca(sk);
121+
122+
if (!(lp->flag & LP_WITHIN_INF))
123+
tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
124+
}
125+
126+
/**
127+
* tcp_lp_remote_hz_estimator
128+
*
129+
* Estimate remote HZ.
130+
* We keep on updating the estimated value, where original TCP-LP
131+
* implementation only guest it for once and use forever.
132+
*/
133+
static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
134+
{
135+
struct tcp_sock *tp = tcp_sk(sk);
136+
struct lp *lp = inet_csk_ca(sk);
137+
s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */
138+
s64 m = 0;
139+
140+
/* not yet record reference time
141+
* go away!! record it before come back!! */
142+
if (lp->remote_ref_time == 0 || lp->local_ref_time == 0)
143+
goto out;
144+
145+
/* we can't calc remote HZ with no different!! */
146+
if (tp->rx_opt.rcv_tsval == lp->remote_ref_time
147+
|| tp->rx_opt.rcv_tsecr == lp->local_ref_time)
148+
goto out;
149+
150+
m = HZ * (tp->rx_opt.rcv_tsval -
151+
lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr -
152+
lp->local_ref_time);
153+
if (m < 0)
154+
m = -m;
155+
156+
if (rhz != 0) {
157+
m -= rhz >> 6; /* m is now error in remote HZ est */
158+
rhz += m; /* 63/64 old + 1/64 new */
159+
} else
160+
rhz = m << 6;
161+
162+
/* record time for successful remote HZ calc */
163+
lp->flag |= LP_VALID_RHZ;
164+
165+
out:
166+
/* record reference time stamp */
167+
lp->remote_ref_time = tp->rx_opt.rcv_tsval;
168+
lp->local_ref_time = tp->rx_opt.rcv_tsecr;
169+
170+
return rhz >> 6;
171+
}
172+
173+
/**
174+
* tcp_lp_owd_calculator
175+
*
176+
* Calculate one way delay (in relative format).
177+
* Original implement OWD as minus of remote time difference to local time
178+
* difference directly. As this time difference just simply equal to RTT, when
179+
* the network status is stable, remote RTT will equal to local RTT, and result
180+
* OWD into zero.
181+
* It seems to be a bug and so we fixed it.
182+
*/
183+
static u32 tcp_lp_owd_calculator(struct sock *sk)
184+
{
185+
struct tcp_sock *tp = tcp_sk(sk);
186+
struct lp *lp = inet_csk_ca(sk);
187+
s64 owd = 0;
188+
189+
lp->remote_hz = tcp_lp_remote_hz_estimator(sk);
190+
191+
if (lp->flag & LP_VALID_RHZ) {
192+
owd =
193+
tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) -
194+
tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ);
195+
if (owd < 0)
196+
owd = -owd;
197+
}
198+
199+
if (owd > 0)
200+
lp->flag |= LP_VALID_OWD;
201+
else
202+
lp->flag &= ~LP_VALID_OWD;
203+
204+
return owd;
205+
}
206+
207+
/**
208+
* tcp_lp_rtt_sample
209+
*
210+
* Implementation or rtt_sample.
211+
* Will take the following action,
212+
* 1. calc OWD,
213+
* 2. record the min/max OWD,
214+
* 3. calc smoothed OWD (SOWD).
215+
* Most ideas come from the original TCP-LP implementation.
216+
*/
217+
static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
218+
{
219+
struct lp *lp = inet_csk_ca(sk);
220+
s64 mowd = tcp_lp_owd_calculator(sk);
221+
222+
/* sorry that we don't have valid data */
223+
if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD))
224+
return;
225+
226+
/* record the next min owd */
227+
if (mowd < lp->owd_min)
228+
lp->owd_min = mowd;
229+
230+
/* always forget the max of the max
231+
* we just set owd_max as one below it */
232+
if (mowd > lp->owd_max) {
233+
if (mowd > lp->owd_max_rsv) {
234+
if (lp->owd_max_rsv == 0)
235+
lp->owd_max = mowd;
236+
else
237+
lp->owd_max = lp->owd_max_rsv;
238+
lp->owd_max_rsv = mowd;
239+
} else
240+
lp->owd_max = mowd;
241+
}
242+
243+
/* calc for smoothed owd */
244+
if (lp->sowd != 0) {
245+
mowd -= lp->sowd >> 3; /* m is now error in owd est */
246+
lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */
247+
} else
248+
lp->sowd = mowd << 3; /* take the measured time be owd */
249+
}
250+
251+
/**
252+
* tcp_lp_pkts_acked
253+
*
254+
* Implementation of pkts_acked.
255+
* Deal with active drop under Early Congestion Indication.
256+
* Only drop to half and 1 will be handle, because we hope to use back
257+
* newReno in increase case.
258+
* We work it out by following the idea from TCP-LP's paper directly
259+
*/
260+
static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
261+
{
262+
struct tcp_sock *tp = tcp_sk(sk);
263+
struct lp *lp = inet_csk_ca(sk);
264+
265+
/* calc inference */
266+
if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
267+
lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
268+
269+
/* test if within inference */
270+
if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference))
271+
lp->flag |= LP_WITHIN_INF;
272+
else
273+
lp->flag &= ~LP_WITHIN_INF;
274+
275+
/* test if within threshold */
276+
if (lp->sowd >> 3 <
277+
lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100)
278+
lp->flag |= LP_WITHIN_THR;
279+
else
280+
lp->flag &= ~LP_WITHIN_THR;
281+
282+
pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
283+
tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max,
284+
lp->sowd >> 3);
285+
286+
if (lp->flag & LP_WITHIN_THR)
287+
return;
288+
289+
/* FIXME: try to reset owd_min and owd_max here
290+
* so decrease the chance the min/max is no longer suitable
291+
* and will usually within threshold when whithin inference */
292+
lp->owd_min = lp->sowd >> 3;
293+
lp->owd_max = lp->sowd >> 2;
294+
lp->owd_max_rsv = lp->sowd >> 2;
295+
296+
/* happened within inference
297+
* drop snd_cwnd into 1 */
298+
if (lp->flag & LP_WITHIN_INF)
299+
tp->snd_cwnd = 1U;
300+
301+
/* happened after inference
302+
* cut snd_cwnd into half */
303+
else
304+
tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
305+
306+
/* record this drop time */
307+
lp->last_drop = tcp_time_stamp;
308+
}
309+
310+
static struct tcp_congestion_ops tcp_lp = {
311+
.init = tcp_lp_init,
312+
.ssthresh = tcp_reno_ssthresh,
313+
.cong_avoid = tcp_lp_cong_avoid,
314+
.min_cwnd = tcp_reno_min_cwnd,
315+
.rtt_sample = tcp_lp_rtt_sample,
316+
.pkts_acked = tcp_lp_pkts_acked,
317+
318+
.owner = THIS_MODULE,
319+
.name = "lp"
320+
};
321+
322+
static int __init tcp_lp_register(void)
323+
{
324+
BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
325+
return tcp_register_congestion_control(&tcp_lp);
326+
}
327+
328+
static void __exit tcp_lp_unregister(void)
329+
{
330+
tcp_unregister_congestion_control(&tcp_lp);
331+
}
332+
333+
module_init(tcp_lp_register);
334+
module_exit(tcp_lp_unregister);
335+
336+
MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun");
337+
MODULE_LICENSE("GPL");
338+
MODULE_DESCRIPTION("TCP Low Priority");

0 commit comments

Comments
 (0)