Skip to content

Commit 59c55ff

Browse files
author
Octavian Purdila
authored
Merge pull request torvalds#183 from hkchu/offload
lkl: Add offload (TSO4, CSUM) support to LKL device code
2 parents 35df18a + b8f6fad commit 59c55ff

File tree

11 files changed

+147
-74
lines changed

11 files changed

+147
-74
lines changed

Documentation/lkl.txt

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,12 @@ are the list of those variable for your environment.
198198
```
199199
* LKL_HIJACK_DEBUG
200200

201-
increase the verbose level of debug information.
201+
Setting it causes some debug information (both from the kernel and the
202+
LKL library) to be enabled.
203+
It is also used as a bit mask to turn on specific debugging facilities.
204+
E.g., setting it to 0x100 ("export LKL_HIJACK_DEBUG=0x100") will cause
205+
the LKL kernel to pause after the hijack'ed app exits. This allows one
206+
to debug or collect info from the LKL kernel before it quits.
202207
```
203208
$ LKL_HIJACK_DEBUG=1 lkl-hijack.sh ip address show
204209
```
@@ -210,6 +215,18 @@ are the list of those variable for your environment.
210215
```
211216
$ LKL_HIJACK_SINGLE_CPU=1 lkl-hijack.sh ip address show
212217
```
218+
* LKL_HIJACK_OFFLOAD
219+
220+
Work as a bit mask to enable selective device offload features. E.g.,
221+
to enable "mergeable RX buffer" (LKL_VIRTIO_NET_F_MRG_RXBUF) +
222+
"guest csum" (LKL_VIRTIO_NET_F_GUEST_CSUM) device features, simply set
223+
it to 0x8002.
224+
225+
See virtio_net.h for a list of offload features and their bit masks.
226+
```
227+
$ LKL_HIJACK_OFFLOAD=0x8002 lkl-hijack.sh ./netserver -D -f
228+
```
229+
213230
FAQ
214231
===
215232

tools/lkl/include/lkl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ struct lkl_netdev;
237237
* @returns a network device id (0 is valid) or a strictly negative value in
238238
* case of error
239239
*/
240-
int lkl_netdev_add(struct lkl_netdev *nd, void *mac);
240+
int lkl_netdev_add(struct lkl_netdev *nd, void *mac, int offload);
241241

242242
/**
243243
* lkl_netdevs_remove - destroy all network devices
@@ -283,7 +283,7 @@ int lkl_stop_syscall_thread(void);
283283
* @ifname - interface name for the TAP device. need to be configured
284284
* on host in advance
285285
*/
286-
struct lkl_netdev *lkl_netdev_tap_create(const char *ifname);
286+
struct lkl_netdev *lkl_netdev_tap_create(const char *ifname, int offload);
287287

288288
/**
289289
* lkl_netdev_dpdk_create - create DPDK net_device for the virtio net backend

tools/lkl/include/lkl_host.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ extern char lkl_virtio_devs[256];
2121

2222
struct lkl_dev_buf {
2323
void *addr;
24-
unsigned int len;
24+
size_t len;
2525
};
2626

2727
extern struct lkl_dev_blk_ops lkl_dev_blk_ops;
@@ -50,6 +50,7 @@ struct lkl_dev_blk_ops {
5050
struct lkl_netdev {
5151
struct lkl_dev_net_ops *ops;
5252
lkl_thread_t rx_tid, tx_tid;
53+
uint8_t has_vnet_hdr: 1;
5354
};
5455

5556
struct lkl_dev_net_ops {
@@ -58,11 +59,11 @@ struct lkl_dev_net_ops {
5859
* The data buffer can only hold 0 or 1 complete packets.
5960
*
6061
* @nd - pointer to the network device
61-
* @data - pointer to the buffer
62-
* @len - size of the buffer in bytes
63-
* @returns 0 for success and -1 for failure.
62+
* @iov - pointer to the buffer vector
63+
* @cnt - # of vectors in iov.
64+
* @returns number of bytes transmitted
6465
*/
65-
int (*tx)(struct lkl_netdev *nd, void *data, int len);
66+
int (*tx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt);
6667
/* Reads a packet from the net device.
6768
*
6869
* It must only read one complete packet if present.
@@ -71,12 +72,11 @@ struct lkl_dev_net_ops {
7172
* decide to drop it or trim it.
7273
*
7374
* @nd - pointer to the network device
74-
* @data - pointer to the buffer to store the packet
75-
* @len - pointer to the maximum size of the buffer. Also stores the
76-
* real number of bytes read after return.
77-
* @returns 0 for success and -1 if nothing is read.
75+
* @iov - pointer to the buffer vector to store the packet
76+
* @cnt - # of vectors in iov.
77+
* @returns number of bytes read for success or < 0 if error
7878
*/
79-
int (*rx)(struct lkl_netdev *nd, void *data, int *len);
79+
int (*rx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt);
8080
#define LKL_DEV_NET_POLL_RX 1
8181
#define LKL_DEV_NET_POLL_TX 2
8282
/* Polls a net device.

tools/lkl/lib/hijack/init.c

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ static void PinToFirstCpu(const cpu_set_t* cpus)
202202
}
203203
}
204204

205+
int lkl_debug;
206+
205207
void __attribute__((constructor(102)))
206208
hijack_init(void)
207209
{
@@ -234,11 +236,17 @@ hijack_init(void)
234236
char *single_cpu= getenv("LKL_HIJACK_SINGLE_CPU");
235237
int single_cpu_mode = 0;
236238
cpu_set_t ori_cpu;
239+
char *offload1 = getenv("LKL_HIJACK_OFFLOAD");
240+
int offload = 0;
237241

238-
if (!debug)
242+
if (!debug) {
239243
lkl_host_ops.print = NULL;
240-
else
244+
} else {
241245
lkl_register_dbg_handler();
246+
lkl_debug = strtol(debug, NULL, 0);
247+
}
248+
if (offload1)
249+
offload = strtol(offload1, NULL, 0);
242250

243251
if (single_cpu) {
244252
single_cpu_mode = atoi(single_cpu);
@@ -274,18 +282,28 @@ hijack_init(void)
274282
"WARN: variable LKL_HIJACK_NET_TAP is now obsoleted.\n"
275283
" please use LKL_HIJACK_NET_IFTYPE and "
276284
"LKL_HIJACK_NET_IFPARAMS instead.\n");
277-
nd = lkl_netdev_tap_create(tap);
285+
nd = lkl_netdev_tap_create(tap, offload);
278286
}
279287

280288
if (!nd && iftype && ifparams) {
281-
if ((strcmp(iftype, "tap") == 0))
282-
nd = lkl_netdev_tap_create(ifparams);
283-
else if (strcmp(iftype, "dpdk") == 0)
284-
nd = lkl_netdev_dpdk_create(ifparams);
285-
else if (strcmp(iftype, "vde") == 0)
286-
nd = lkl_netdev_vde_create(ifparams);
287-
else if (strcmp(iftype, "raw") == 0)
288-
nd = lkl_netdev_raw_create(ifparams);
289+
if ((strcmp(iftype, "tap") == 0)) {
290+
nd = lkl_netdev_tap_create(ifparams, offload);
291+
} else {
292+
if (offload) {
293+
fprintf(stderr,
294+
"WARN: LKL_HIJACK_OFFLOAD is only "
295+
"supported on tap device (for now)!\n"
296+
"No offload features will be "
297+
"enabled.\n");
298+
}
299+
offload = 0;
300+
if (strcmp(iftype, "dpdk") == 0)
301+
nd = lkl_netdev_dpdk_create(ifparams);
302+
else if (strcmp(iftype, "vde") == 0)
303+
nd = lkl_netdev_vde_create(ifparams);
304+
else if (strcmp(iftype, "raw") == 0)
305+
nd = lkl_netdev_raw_create(ifparams);
306+
}
289307
}
290308

291309
if (nd) {
@@ -295,9 +313,9 @@ hijack_init(void)
295313
fprintf(stderr, "failed to parse mac\n");
296314
return;
297315
} else if (ret > 0) {
298-
ret = lkl_netdev_add(nd, mac);
316+
ret = lkl_netdev_add(nd, mac, offload);
299317
} else {
300-
ret = lkl_netdev_add(nd, NULL);
318+
ret = lkl_netdev_add(nd, NULL, offload);
301319
}
302320

303321
if (ret < 0) {
@@ -388,6 +406,13 @@ hijack_fini(void)
388406
int i;
389407
char *dump = getenv("LKL_HIJACK_DUMP");
390408

409+
/* The following pauses the kernel before exiting allowing one
410+
* to debug or collect stattistics/diagnosis info from it.
411+
*/
412+
if (lkl_debug & 0x100) {
413+
while (1)
414+
pause();
415+
}
391416
if (dump)
392417
mount_cmds_exec(dump, dump_file);
393418

tools/lkl/lib/virtio_net.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,36 +73,36 @@ static int net_enqueue(struct virtio_dev *dev, struct virtio_req *req)
7373
{
7474
struct lkl_virtio_net_hdr_v1 *header;
7575
struct virtio_net_dev *net_dev;
76-
int ret, len;
77-
void *buf;
76+
int ret;
77+
struct lkl_dev_buf iov[1];
7878

7979
header = req->buf[0].addr;
8080
net_dev = netdev_of(dev);
81-
len = req->buf[0].len - sizeof(*header);
81+
iov[0].len = req->buf[0].len - sizeof(*header);
8282

83-
buf = &header[1];
83+
iov[0].addr = &header[1];
8484

85-
if (!len && req->buf_count > 1) {
86-
buf = req->buf[1].addr;
87-
len = req->buf[1].len;
85+
if (!iov[0].len && req->buf_count > 1) {
86+
iov[0].addr = req->buf[1].addr;
87+
iov[0].len = req->buf[1].len;
8888
}
8989

9090
/* Pick which virtqueue to send the buffer(s) to */
9191
if (is_tx_queue(dev, req->q)) {
92-
ret = net_dev->ops->tx(net_dev->nd, buf, len);
92+
ret = net_dev->ops->tx(net_dev->nd, iov, 1);
9393
if (ret < 0)
9494
return -1;
9595
} else if (is_rx_queue(dev, req->q)) {
9696
header->num_buffers = 1;
97-
ret = net_dev->ops->rx(net_dev->nd, buf, &len);
97+
ret = net_dev->ops->rx(net_dev->nd, iov, 1);
9898
if (ret < 0)
9999
return -1;
100100
} else {
101101
bad_request("tried to push on non-existent queue");
102102
return -1;
103103
}
104104

105-
virtio_req_complete(req, len + sizeof(*header));
105+
virtio_req_complete(req, iov[0].len + sizeof(*header));
106106
return 0;
107107
}
108108

@@ -174,7 +174,7 @@ static struct lkl_mutex **init_queue_locks(int num_queues)
174174
return ret;
175175
}
176176

177-
int lkl_netdev_add(struct lkl_netdev *nd, void *mac)
177+
int lkl_netdev_add(struct lkl_netdev *nd, void *mac, int offload)
178178
{
179179
struct virtio_net_dev *dev;
180180
int ret = -LKL_ENOMEM;
@@ -188,6 +188,7 @@ int lkl_netdev_add(struct lkl_netdev *nd, void *mac)
188188
dev->dev.device_id = LKL_VIRTIO_ID_NET;
189189
if (mac)
190190
dev->dev.device_features |= BIT(LKL_VIRTIO_NET_F_MAC);
191+
dev->dev.device_features |= offload;
191192
dev->dev.config_data = &dev->config;
192193
dev->dev.config_len = sizeof(dev->config);
193194
dev->dev.ops = &net_ops;

tools/lkl/lib/virtio_net_dpdk.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,13 @@ struct lkl_netdev_dpdk {
5858
int bufidx;
5959
};
6060

61-
static int net_tx(struct lkl_netdev *nd, void *data, int len)
61+
static int net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
6262
{
6363
void *pkt;
6464
struct rte_mbuf *rm;
6565
struct lkl_netdev_dpdk *nd_dpdk;
66+
void *data = iov[0].addr;
67+
int len = (int)iov[0].len;
6668

6769
nd_dpdk = (struct lkl_netdev_dpdk *) nd;
6870

@@ -80,7 +82,7 @@ static int net_tx(struct lkl_netdev *nd, void *data, int len)
8082
/* XXX: should be bulk-trasmitted !! */
8183
rte_eth_tx_burst(nd_dpdk->portid, 0, &rm, 1);
8284

83-
return 0;
85+
return len;
8486
}
8587

8688
/*
@@ -90,10 +92,12 @@ static int net_tx(struct lkl_netdev *nd, void *data, int len)
9092
* refactor allows us to read in parallel, the buffer (nd_dpdk->rms) shall
9193
* be guarded.
9294
*/
93-
static int net_rx(struct lkl_netdev *nd, void *data, int *len)
95+
static int net_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt)
9496
{
9597
struct lkl_netdev_dpdk *nd_dpdk;
9698
int i, nb_rx, read = 0;
99+
void *data = iov[0].addr;
100+
int len = (int)iov[0].len;
97101

98102
nd_dpdk = (struct lkl_netdev_dpdk *) nd;
99103

@@ -122,8 +126,8 @@ static int net_rx(struct lkl_netdev *nd, void *data, int *len)
122126
r_data = rte_pktmbuf_mtod(rm, void *);
123127
r_size = rte_pktmbuf_data_len(rm);
124128

125-
*len -= r_size;
126-
if (*len < 0) {
129+
len -= r_size;
130+
if (len < 0) {
127131
fprintf(stderr, "dpdk: buffer full. skip it\n");
128132
goto end;
129133
}
@@ -144,8 +148,7 @@ static int net_rx(struct lkl_netdev *nd, void *data, int *len)
144148
for (i = 0; i < nb_rx; i++)
145149
rte_pktmbuf_free(nd_dpdk->rms[i]);
146150

147-
*len = read;
148-
return 0;
151+
return read;
149152
}
150153

151154
static int net_poll(struct lkl_netdev *nd, int events)

tools/lkl/lib/virtio_net_linux_fdnet.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <string.h>
1616
#include <sys/epoll.h>
1717
#include <sys/eventfd.h>
18+
#include <sys/uio.h>
1819

1920
#include "virtio.h"
2021
#include "virtio_net_linux_fdnet.h"
@@ -31,40 +32,37 @@ struct lkl_netdev_linux_fdnet_ops lkl_netdev_linux_fdnet_ops = {
3132
#endif /* __NR_eventfd */
3233
};
3334

34-
static int linux_fdnet_net_tx(struct lkl_netdev *nd, void *data, int len)
35+
static int linux_fdnet_net_tx(struct lkl_netdev *nd,
36+
struct lkl_dev_buf *iov, int cnt)
3537
{
3638
int ret;
3739
struct lkl_netdev_linux_fdnet *nd_fdnet =
3840
container_of(nd, struct lkl_netdev_linux_fdnet, dev);
3941

4042
do {
41-
ret = write(nd_fdnet->fd, data, len);
43+
ret = writev(nd_fdnet->fd, (struct iovec *)iov, cnt);
4244
} while (ret == -1 && errno == EINVAL);
43-
if (ret > 0)
44-
return 0;
45+
4546
if (ret < 0 && errno != EAGAIN)
4647
perror("write to Linux fd netdev fails");
47-
48-
return -1;
48+
return ret;
4949
}
5050

51-
static int linux_fdnet_net_rx(struct lkl_netdev *nd, void *data, int *len)
51+
static int linux_fdnet_net_rx(struct lkl_netdev *nd,
52+
struct lkl_dev_buf *iov, int cnt)
5253
{
5354
int ret;
5455
struct lkl_netdev_linux_fdnet *nd_fdnet =
5556
container_of(nd, struct lkl_netdev_linux_fdnet, dev);
5657

5758
do {
58-
ret = read(nd_fdnet->fd, data, *len);
59+
ret = readv(nd_fdnet->fd, (struct iovec *)iov, cnt);
5960
} while (ret == -1 && errno == EINVAL);
60-
if (ret > 0) {
61-
*len = ret;
62-
return 0;
63-
}
61+
6462
if (ret < 0 && errno != EAGAIN)
6563
perror("read from fdnet device fails");
6664

67-
return -1;
65+
return ret;
6866
}
6967

7068
static int linux_fdnet_net_poll(struct lkl_netdev *nd, int events)

0 commit comments

Comments
 (0)