cxgb4: Update IngPad and IngPack values

We are using the smallest padding boundary (8 bytes), which isn't
smaller than the Memory Controller Read/Write Size

We get best performance in 100G when the Packing Boundary is a multiple
of the Maximum Payload Size. Its related to inefficient chopping of DMA
packets by PCIe, that causes more overhead on bus. So driver is helping
by making the starting address alignment to be MPS size.

We will try to determine PCIE MaxPayloadSize capabiltiy  and set
IngPackBoundary based on this value. If cache line size is greater than
MPS or determinig MPS fails, we will use cache line size to determine
IngPackBoundary(as before).

Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Arjun Vynipadath 2017-03-20 14:22:38 +05:30 committed by David S. Miller
parent 3588f29e06
commit bb58d07964
2 changed files with 61 additions and 20 deletions

View File

@ -6369,7 +6369,6 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
unsigned int stat_len = cache_line_size > 64 ? 128 : 64;
unsigned int fl_align = cache_line_size < 32 ? 32 : cache_line_size;
unsigned int fl_align_log = fls(fl_align) - 1;
unsigned int ingpad;
t4_write_reg(adap, SGE_HOST_PAGE_SIZE_A,
HOSTPAGESIZEPF0_V(sge_hps) |
@ -6389,6 +6388,10 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
INGPADBOUNDARY_SHIFT_X) |
EGRSTATUSPAGESIZE_V(stat_len != 64));
} else {
unsigned int pack_align;
unsigned int ingpad, ingpack;
unsigned int pcie_cap;
/* T5 introduced the separation of the Free List Padding and
* Packing Boundaries. Thus, we can select a smaller Padding
* Boundary to avoid uselessly chewing up PCIe Link and Memory
@ -6401,27 +6404,62 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
* Size (the minimum unit of transfer to/from Memory). If we
* have a Padding Boundary which is smaller than the Memory
* Line Size, that'll involve a Read-Modify-Write cycle on the
* Memory Controller which is never good. For T5 the smallest
* Padding Boundary which we can select is 32 bytes which is
* larger than any known Memory Controller Line Size so we'll
* use that.
*
* T5 has a different interpretation of the "0" value for the
* Packing Boundary. This corresponds to 16 bytes instead of
* the expected 32 bytes. We never have a Packing Boundary
* less than 32 bytes so we can't use that special value but
* on the other hand, if we wanted 32 bytes, the best we can
* really do is 64 bytes.
*/
if (fl_align <= 32) {
fl_align = 64;
fl_align_log = 6;
* Memory Controller which is never good.
*/
/* We want the Packing Boundary to be based on the Cache Line
* Size in order to help avoid False Sharing performance
* issues between CPUs, etc. We also want the Packing
* Boundary to incorporate the PCI-E Maximum Payload Size. We
* get best performance when the Packing Boundary is a
* multiple of the Maximum Payload Size.
*/
pack_align = fl_align;
pcie_cap = pci_find_capability(adap->pdev, PCI_CAP_ID_EXP);
if (pcie_cap) {
unsigned int mps, mps_log;
u16 devctl;
/* The PCIe Device Control Maximum Payload Size field
* [bits 7:5] encodes sizes as powers of 2 starting at
* 128 bytes.
*/
pci_read_config_word(adap->pdev,
pcie_cap + PCI_EXP_DEVCTL,
&devctl);
mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
mps = 1 << mps_log;
if (mps > pack_align)
pack_align = mps;
}
/* N.B. T5/T6 have a crazy special interpretation of the "0"
* value for the Packing Boundary. This corresponds to 16
* bytes instead of the expected 32 bytes. So if we want 32
* bytes, the best we can really do is 64 bytes ...
*/
if (pack_align <= 16) {
ingpack = INGPACKBOUNDARY_16B_X;
fl_align = 16;
} else if (pack_align == 32) {
ingpack = INGPACKBOUNDARY_64B_X;
fl_align = 64;
} else {
unsigned int pack_align_log = fls(pack_align) - 1;
ingpack = pack_align_log - INGPACKBOUNDARY_SHIFT_X;
fl_align = pack_align;
}
/* Use the smallest Ingress Padding which isn't smaller than
* the Memory Controller Read/Write Size. We'll take that as
* being 8 bytes since we don't know of any system with a
* wider Memory Controller Bus Width.
*/
if (is_t5(adap->params.chip))
ingpad = INGPCIEBOUNDARY_32B_X;
ingpad = INGPADBOUNDARY_32B_X;
else
ingpad = T6_INGPADBOUNDARY_32B_X;
ingpad = T6_INGPADBOUNDARY_8B_X;
t4_set_reg_field(adap, SGE_CONTROL_A,
INGPADBOUNDARY_V(INGPADBOUNDARY_M) |
@ -6430,8 +6468,7 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
EGRSTATUSPAGESIZE_V(stat_len != 64));
t4_set_reg_field(adap, SGE_CONTROL2_A,
INGPACKBOUNDARY_V(INGPACKBOUNDARY_M),
INGPACKBOUNDARY_V(fl_align_log -
INGPACKBOUNDARY_SHIFT_X));
INGPACKBOUNDARY_V(ingpack));
}
/*
* Adjust various SGE Free List Host Buffer Sizes.

View File

@ -54,11 +54,15 @@
#define INGPADBOUNDARY_SHIFT_X 5
#define T6_INGPADBOUNDARY_SHIFT_X 3
#define T6_INGPADBOUNDARY_8B_X 0
#define T6_INGPADBOUNDARY_32B_X 2
#define INGPADBOUNDARY_32B_X 0
/* CONTROL2 register */
#define INGPACKBOUNDARY_SHIFT_X 5
#define INGPACKBOUNDARY_16B_X 0
#define INGPACKBOUNDARY_64B_X 1
/* GTS register */
#define SGE_TIMERREGS 6