Skip to content

Commit 6f80fcd

Browse files
committed
Merge branch 'mlxsw-use-page-pool-for-rx-buffers-allocation'
Petr Machata says: ==================== mlxsw: Use page pool for Rx buffers allocation Amit Cohen writes: After using NAPI to process events from hardware, the next step is to use page pool for Rx buffers allocation, which is also enhances performance. To simplify this change, first use page pool to allocate one continuous buffer for each packet, later memory consumption can be improved by using fragmented buffers. This set significantly enhances mlxsw driver performance, CPU can handle about 370% of the packets per second it previously handled. The next planned improvement is using XDP to optimize telemetry. Patch set overview: Patches #1-#2 are small preparations for page pool usage Patch #3 initializes page pool, but do not use it Patch #4 converts the driver to use page pool for buffers allocations Patch #5 is an optimization for buffer access Patch #6 cleans up an unused structure Patch #7 uses napi_consume_skb() as part of Tx completion ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 89f5e60 + d94ae64 commit 6f80fcd

File tree

2 files changed

+142
-58
lines changed

2 files changed

+142
-58
lines changed

drivers/net/ethernet/mellanox/mlxsw/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ config MLXSW_CORE_THERMAL
3333
config MLXSW_PCI
3434
tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
3535
depends on PCI && HAS_IOMEM && MLXSW_CORE
36+
select PAGE_POOL
3637
default m
3738
help
3839
This is PCI bus implementation for Mellanox Technologies Switch ASICs.

drivers/net/ethernet/mellanox/mlxsw/pci.c

Lines changed: 141 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/if_vlan.h>
1414
#include <linux/log2.h>
1515
#include <linux/string.h>
16+
#include <net/page_pool/helpers.h>
1617

1718
#include "pci_hw.h"
1819
#include "pci.h"
@@ -61,15 +62,11 @@ struct mlxsw_pci_mem_item {
6162
};
6263

6364
struct mlxsw_pci_queue_elem_info {
65+
struct page *page;
6466
char *elem; /* pointer to actual dma mapped element mem chunk */
65-
union {
66-
struct {
67-
struct sk_buff *skb;
68-
} sdq;
69-
struct {
70-
struct sk_buff *skb;
71-
} rdq;
72-
} u;
67+
struct {
68+
struct sk_buff *skb;
69+
} sdq;
7370
};
7471

7572
struct mlxsw_pci_queue {
@@ -88,10 +85,14 @@ struct mlxsw_pci_queue {
8885
enum mlxsw_pci_cqe_v v;
8986
struct mlxsw_pci_queue *dq;
9087
struct napi_struct napi;
88+
struct page_pool *page_pool;
9189
} cq;
9290
struct {
9391
struct tasklet_struct tasklet;
9492
} eq;
93+
struct {
94+
struct mlxsw_pci_queue *cq;
95+
} rdq;
9596
} u;
9697
};
9798

@@ -335,6 +336,25 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
335336
mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num);
336337
}
337338

339+
#define MLXSW_PCI_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
340+
341+
#define MLXSW_PCI_RX_BUF_SW_OVERHEAD \
342+
(MLXSW_PCI_SKB_HEADROOM + \
343+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
344+
345+
static void
346+
mlxsw_pci_wqe_rx_frag_set(struct mlxsw_pci *mlxsw_pci, struct page *page,
347+
char *wqe, int index, size_t frag_len)
348+
{
349+
dma_addr_t mapaddr;
350+
351+
mapaddr = page_pool_get_dma_addr(page);
352+
mapaddr += MLXSW_PCI_SKB_HEADROOM;
353+
354+
mlxsw_pci_wqe_address_set(wqe, index, mapaddr);
355+
mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len);
356+
}
357+
338358
static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
339359
int index, char *frag_data, size_t frag_len,
340360
int direction)
@@ -364,43 +384,47 @@ static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe,
364384
dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction);
365385
}
366386

367-
static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
368-
struct mlxsw_pci_queue_elem_info *elem_info,
369-
gfp_t gfp)
387+
static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *page,
388+
u16 byte_count)
370389
{
390+
void *data = page_address(page);
391+
unsigned int allocated_size;
392+
struct sk_buff *skb;
393+
394+
net_prefetch(data);
395+
allocated_size = page_size(page);
396+
skb = napi_build_skb(data, allocated_size);
397+
if (unlikely(!skb))
398+
return ERR_PTR(-ENOMEM);
399+
400+
skb_reserve(skb, MLXSW_PCI_SKB_HEADROOM);
401+
skb_put(skb, byte_count);
402+
return skb;
403+
}
404+
405+
static int mlxsw_pci_rdq_page_alloc(struct mlxsw_pci_queue *q,
406+
struct mlxsw_pci_queue_elem_info *elem_info)
407+
{
408+
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
371409
size_t buf_len = MLXSW_PORT_MAX_MTU;
372410
char *wqe = elem_info->elem;
373-
struct sk_buff *skb;
374-
int err;
411+
struct page *page;
375412

376-
skb = __netdev_alloc_skb_ip_align(NULL, buf_len, gfp);
377-
if (!skb)
413+
page = page_pool_dev_alloc_pages(cq->u.cq.page_pool);
414+
if (unlikely(!page))
378415
return -ENOMEM;
379416

380-
err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
381-
buf_len, DMA_FROM_DEVICE);
382-
if (err)
383-
goto err_frag_map;
384-
385-
elem_info->u.rdq.skb = skb;
417+
mlxsw_pci_wqe_rx_frag_set(q->pci, page, wqe, 0, buf_len);
418+
elem_info->page = page;
386419
return 0;
387-
388-
err_frag_map:
389-
dev_kfree_skb_any(skb);
390-
return err;
391420
}
392421

393-
static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci,
394-
struct mlxsw_pci_queue_elem_info *elem_info)
422+
static void mlxsw_pci_rdq_page_free(struct mlxsw_pci_queue *q,
423+
struct mlxsw_pci_queue_elem_info *elem_info)
395424
{
396-
struct sk_buff *skb;
397-
char *wqe;
398-
399-
skb = elem_info->u.rdq.skb;
400-
wqe = elem_info->elem;
425+
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
401426

402-
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
403-
dev_kfree_skb_any(skb);
427+
page_pool_put_page(cq->u.cq.page_pool, elem_info->page, -1, false);
404428
}
405429

406430
static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
@@ -434,13 +458,14 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
434458

435459
cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num);
436460
cq->u.cq.dq = q;
461+
q->u.rdq.cq = cq;
437462

438463
mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
439464

440465
for (i = 0; i < q->count; i++) {
441466
elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
442467
BUG_ON(!elem_info);
443-
err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_KERNEL);
468+
err = mlxsw_pci_rdq_page_alloc(q, elem_info);
444469
if (err)
445470
goto rollback;
446471
/* Everything is set up, ring doorbell to pass elem to HW */
@@ -453,8 +478,9 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
453478
rollback:
454479
for (i--; i >= 0; i--) {
455480
elem_info = mlxsw_pci_queue_elem_info_get(q, i);
456-
mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
481+
mlxsw_pci_rdq_page_free(q, elem_info);
457482
}
483+
q->u.rdq.cq = NULL;
458484
cq->u.cq.dq = NULL;
459485
mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
460486

@@ -470,7 +496,7 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
470496
mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
471497
for (i = 0; i < q->count; i++) {
472498
elem_info = mlxsw_pci_queue_elem_info_get(q, i);
473-
mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
499+
mlxsw_pci_rdq_page_free(q, elem_info);
474500
}
475501
}
476502

@@ -515,7 +541,7 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
515541
struct mlxsw_pci_queue *q,
516542
u16 consumer_counter_limit,
517543
enum mlxsw_pci_cqe_v cqe_v,
518-
char *cqe)
544+
char *cqe, int budget)
519545
{
520546
struct pci_dev *pdev = mlxsw_pci->pdev;
521547
struct mlxsw_pci_queue_elem_info *elem_info;
@@ -526,8 +552,8 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
526552

527553
spin_lock(&q->lock);
528554
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
529-
tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info;
530-
skb = elem_info->u.sdq.skb;
555+
tx_info = mlxsw_skb_cb(elem_info->sdq.skb)->tx_info;
556+
skb = elem_info->sdq.skb;
531557
wqe = elem_info->elem;
532558
for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
533559
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
@@ -541,8 +567,8 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
541567
}
542568

543569
if (skb)
544-
dev_kfree_skb_any(skb);
545-
elem_info->u.sdq.skb = NULL;
570+
napi_consume_skb(skb, budget);
571+
elem_info->sdq.skb = NULL;
546572

547573
if (q->consumer_counter++ != consumer_counter_limit)
548574
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in SDQ\n");
@@ -605,26 +631,38 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
605631
{
606632
struct pci_dev *pdev = mlxsw_pci->pdev;
607633
struct mlxsw_pci_queue_elem_info *elem_info;
634+
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
608635
struct mlxsw_rx_info rx_info = {};
609-
char wqe[MLXSW_PCI_WQE_SIZE];
610636
struct sk_buff *skb;
637+
struct page *page;
611638
u16 byte_count;
612639
int err;
613640

614641
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
615-
skb = elem_info->u.rdq.skb;
616-
memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE);
617642

618643
if (q->consumer_counter++ != consumer_counter_limit)
619644
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
620645

621-
err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_ATOMIC);
646+
byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
647+
if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
648+
byte_count -= ETH_FCS_LEN;
649+
650+
page = elem_info->page;
651+
652+
err = mlxsw_pci_rdq_page_alloc(q, elem_info);
622653
if (err) {
623-
dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
654+
dev_err_ratelimited(&pdev->dev, "Failed to alloc page\n");
655+
goto out;
656+
}
657+
658+
skb = mlxsw_pci_rdq_build_skb(page, byte_count);
659+
if (IS_ERR(skb)) {
660+
dev_err_ratelimited(&pdev->dev, "Failed to build skb for RDQ\n");
661+
page_pool_recycle_direct(cq->u.cq.page_pool, page);
624662
goto out;
625663
}
626664

627-
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
665+
skb_mark_for_recycle(skb);
628666

629667
if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
630668
rx_info.is_lag = true;
@@ -657,10 +695,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
657695

658696
mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe);
659697

660-
byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
661-
if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
662-
byte_count -= ETH_FCS_LEN;
663-
skb_put(skb, byte_count);
664698
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
665699

666700
out:
@@ -785,7 +819,7 @@ static int mlxsw_pci_napi_poll_cq_tx(struct napi_struct *napi, int budget)
785819
mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
786820

787821
mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq,
788-
wqe_counter, q->u.cq.v, ncqe);
822+
wqe_counter, q->u.cq.v, ncqe, budget);
789823

790824
work_done++;
791825
}
@@ -832,19 +866,54 @@ static void mlxsw_pci_cq_napi_setup(struct mlxsw_pci_queue *q,
832866
mlxsw_pci_napi_poll_cq_rx);
833867
break;
834868
}
835-
836-
napi_enable(&q->u.cq.napi);
837869
}
838870

839871
static void mlxsw_pci_cq_napi_teardown(struct mlxsw_pci_queue *q)
840872
{
841-
napi_disable(&q->u.cq.napi);
842873
netif_napi_del(&q->u.cq.napi);
843874
}
844875

876+
static int mlxsw_pci_cq_page_pool_init(struct mlxsw_pci_queue *q,
877+
enum mlxsw_pci_cq_type cq_type)
878+
{
879+
struct page_pool_params pp_params = {};
880+
struct mlxsw_pci *mlxsw_pci = q->pci;
881+
struct page_pool *page_pool;
882+
u32 max_pkt_size;
883+
884+
if (cq_type != MLXSW_PCI_CQ_RDQ)
885+
return 0;
886+
887+
max_pkt_size = MLXSW_PORT_MAX_MTU + MLXSW_PCI_RX_BUF_SW_OVERHEAD;
888+
pp_params.order = get_order(max_pkt_size);
889+
pp_params.flags = PP_FLAG_DMA_MAP;
890+
pp_params.pool_size = MLXSW_PCI_WQE_COUNT;
891+
pp_params.nid = dev_to_node(&mlxsw_pci->pdev->dev);
892+
pp_params.dev = &mlxsw_pci->pdev->dev;
893+
pp_params.napi = &q->u.cq.napi;
894+
pp_params.dma_dir = DMA_FROM_DEVICE;
895+
896+
page_pool = page_pool_create(&pp_params);
897+
if (IS_ERR(page_pool))
898+
return PTR_ERR(page_pool);
899+
900+
q->u.cq.page_pool = page_pool;
901+
return 0;
902+
}
903+
904+
static void mlxsw_pci_cq_page_pool_fini(struct mlxsw_pci_queue *q,
905+
enum mlxsw_pci_cq_type cq_type)
906+
{
907+
if (cq_type != MLXSW_PCI_CQ_RDQ)
908+
return;
909+
910+
page_pool_destroy(q->u.cq.page_pool);
911+
}
912+
845913
static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
846914
struct mlxsw_pci_queue *q)
847915
{
916+
enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q);
848917
int i;
849918
int err;
850919

@@ -874,15 +943,29 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
874943
err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num);
875944
if (err)
876945
return err;
877-
mlxsw_pci_cq_napi_setup(q, mlxsw_pci_cq_type(mlxsw_pci, q));
946+
mlxsw_pci_cq_napi_setup(q, cq_type);
947+
948+
err = mlxsw_pci_cq_page_pool_init(q, cq_type);
949+
if (err)
950+
goto err_page_pool_init;
951+
952+
napi_enable(&q->u.cq.napi);
878953
mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
879954
mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
880955
return 0;
956+
957+
err_page_pool_init:
958+
mlxsw_pci_cq_napi_teardown(q);
959+
return err;
881960
}
882961

883962
static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci,
884963
struct mlxsw_pci_queue *q)
885964
{
965+
enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q);
966+
967+
napi_disable(&q->u.cq.napi);
968+
mlxsw_pci_cq_page_pool_fini(q, cq_type);
886969
mlxsw_pci_cq_napi_teardown(q);
887970
mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num);
888971
}
@@ -1919,7 +2002,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
19192002
goto unlock;
19202003
}
19212004
mlxsw_skb_cb(skb)->tx_info = *tx_info;
1922-
elem_info->u.sdq.skb = skb;
2005+
elem_info->sdq.skb = skb;
19232006

19242007
wqe = elem_info->elem;
19252008
mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */

0 commit comments

Comments
 (0)