linux/dim: Move implementation to .c files

Moved all logic from dim.h and net_dim.h to dim.c and net_dim.c.
This is both more structurally appealing and would allow to only
expose externally used functions.

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
Tal Gilboa 2019-01-10 17:33:17 +02:00 committed by Saeed Mahameed
parent 8960b38932
commit 4f75da3666
17 changed files with 546 additions and 344 deletions

View File

@ -5588,8 +5588,8 @@ F: include/linux/dynamic_debug.h
DYNAMIC INTERRUPT MODERATION DYNAMIC INTERRUPT MODERATION
M: Tal Gilboa <talgi@mellanox.com> M: Tal Gilboa <talgi@mellanox.com>
S: Maintained S: Maintained
F: include/linux/net_dim.h
F: include/linux/dim.h F: include/linux/dim.h
F: lib/dim/
DZ DECSTATION DZ11 SERIAL DRIVER DZ DECSTATION DZ11 SERIAL DRIVER
M: "Maciej W. Rozycki" <macro@linux-mips.org> M: "Maciej W. Rozycki" <macro@linux-mips.org>

View File

@ -8,6 +8,7 @@ config NET_VENDOR_BROADCOM
default y default y
depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \ depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \
SIBYTE_SB1xxx_SOC SIBYTE_SB1xxx_SOC
select DIMLIB
---help--- ---help---
If you have a network (Ethernet) chipset belonging to this class, If you have a network (Ethernet) chipset belonging to this class,
say Y. say Y.

View File

@ -14,7 +14,7 @@
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/ethtool.h> #include <linux/ethtool.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/net_dim.h> #include <linux/dim.h>
/* Receive/transmit descriptor format */ /* Receive/transmit descriptor format */
#define DESC_ADDR_HI_STATUS_LEN 0x00 #define DESC_ADDR_HI_STATUS_LEN 0x00

View File

@ -23,7 +23,7 @@
#include <net/devlink.h> #include <net/devlink.h>
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
#include <net/xdp.h> #include <net/xdp.h>
#include <linux/net_dim.h> #include <linux/dim.h>
struct tx_bd { struct tx_bd {
__le32 tx_bd_len_flags_type; __le32 tx_bd_len_flags_type;

View File

@ -11,7 +11,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/pci.h> #include <linux/pci.h>
#include "bnxt_hsi.h" #include "bnxt_hsi.h"
#include <linux/net_dim.h> #include <linux/dim.h>
#include "bnxt.h" #include "bnxt.h"
#include "bnxt_debugfs.h" #include "bnxt_debugfs.h"

View File

@ -7,7 +7,7 @@
* the Free Software Foundation. * the Free Software Foundation.
*/ */
#include <linux/net_dim.h> #include <linux/dim.h>
#include "bnxt_hsi.h" #include "bnxt_hsi.h"
#include "bnxt.h" #include "bnxt.h"

View File

@ -16,7 +16,7 @@
#include <linux/mii.h> #include <linux/mii.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/phy.h> #include <linux/phy.h>
#include <linux/net_dim.h> #include <linux/dim.h>
/* total number of Buffer Descriptors, same for Rx/Tx */ /* total number of Buffer Descriptors, same for Rx/Tx */
#define TOTAL_DESC 256 #define TOTAL_DESC 256

View File

@ -34,6 +34,7 @@ config MLX5_CORE_EN
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
depends on IPV6=y || IPV6=n || MLX5_CORE=m depends on IPV6=y || IPV6=n || MLX5_CORE=m
select PAGE_POOL select PAGE_POOL
select DIMLIB
default n default n
---help--- ---help---
Ethernet support in Mellanox Technologies ConnectX-4 NIC. Ethernet support in Mellanox Technologies ConnectX-4 NIC.

View File

@ -48,7 +48,7 @@
#include <linux/rhashtable.h> #include <linux/rhashtable.h>
#include <net/switchdev.h> #include <net/switchdev.h>
#include <net/xdp.h> #include <net/xdp.h>
#include <linux/net_dim.h> #include <linux/dim.h>
#include <linux/bits.h> #include <linux/bits.h>
#include "wq.h" #include "wq.h"
#include "mlx5_core.h" #include "mlx5_core.h"

View File

@ -30,7 +30,7 @@
* SOFTWARE. * SOFTWARE.
*/ */
#include <linux/net_dim.h> #include <linux/dim.h>
#include "en.h" #include "en.h"
static void static void

View File

@ -6,20 +6,49 @@
#include <linux/module.h> #include <linux/module.h>
/**
* Number of events between DIM iterations.
* Causes a moderation of the algorithm run.
*/
#define DIM_NEVENTS 64 #define DIM_NEVENTS 64
/* more than 10% difference */ /**
* Is a difference between values justifies taking an action.
* We consider 10% difference as significant.
*/
#define IS_SIGNIFICANT_DIFF(val, ref) \ #define IS_SIGNIFICANT_DIFF(val, ref) \
(((100UL * abs((val) - (ref))) / (ref)) > 10) (((100UL * abs((val) - (ref))) / (ref)) > 10)
#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
& (BIT_ULL(bits) - 1))
/**
* Calculate the gap between two values.
* Take wrap-around and variable size into consideration.
*/
#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
& (BIT_ULL(bits) - 1))
/**
* Structure for CQ moderation values.
* Used for communications between DIM and its consumer.
*
* @usec: CQ timer suggestion (by DIM)
* @pkts: CQ packet counter suggestion (by DIM)
* @cq_period_mode: CQ priod count mode (from CQE/EQE)
*/
struct dim_cq_moder { struct dim_cq_moder {
u16 usec; u16 usec;
u16 pkts; u16 pkts;
u8 cq_period_mode; u8 cq_period_mode;
}; };
/**
* Structure for DIM sample data.
* Used for communications between DIM and its consumer.
*
* @time: Sample timestamp
* @pkt_ctr: Number of packets
* @byte_ctr: Number of bytes
* @event_ctr: Number of events
*/
struct dim_sample { struct dim_sample {
ktime_t time; ktime_t time;
u32 pkt_ctr; u32 pkt_ctr;
@ -27,13 +56,36 @@ struct dim_sample {
u16 event_ctr; u16 event_ctr;
}; };
/**
* Structure for DIM stats.
* Used for holding current measured rates.
*
* @ppms: Packets per msec
* @bpms: Bytes per msec
* @epms: Events per msec
*/
struct dim_stats { struct dim_stats {
int ppms; /* packets per msec */ int ppms;
int bpms; /* bytes per msec */ int bpms;
int epms; /* events per msec */ int epms;
}; };
struct dim { /* Dynamic Interrupt Moderation */ /**
* Main structure for dynamic interrupt moderation (DIM).
* Used for holding all information about a specific DIM instance.
*
* @state: Algorithm state (see below)
* @prev_stats: Measured rates from previous iteration (for comparison)
* @start_sample: Sampled data at start of current iteration
* @work: Work to perform on action required
* @profile_ix: Current moderation profile
* @mode: CQ period count mode
* @tune_state: Algorithm tuning state (see below)
* @steps_right: Number of steps taken towards higher moderation
* @steps_left: Number of steps taken towards lower moderation
* @tired: Parking depth counter
*/
struct dim {
u8 state; u8 state;
struct dim_stats prev_stats; struct dim_stats prev_stats;
struct dim_sample start_sample; struct dim_sample start_sample;
@ -46,18 +98,49 @@ struct dim { /* Dynamic Interrupt Moderation */
u8 tired; u8 tired;
}; };
/**
* enum dim_cq_period_mode
*
* These are the modes for CQ period count.
*
* @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
* @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
* @DIM_CQ_PERIOD_NUM_MODES: Number of modes
*/
enum { enum {
DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
DIM_CQ_PERIOD_NUM_MODES DIM_CQ_PERIOD_NUM_MODES
}; };
/**
* enum dim_state
*
* These are the DIM algorithm states.
* These will determine if the algorithm is in a valid state to start an iteration.
*
* @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
* @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if
* need to perform an action
* @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
*/
enum { enum {
DIM_START_MEASURE, DIM_START_MEASURE,
DIM_MEASURE_IN_PROGRESS, DIM_MEASURE_IN_PROGRESS,
DIM_APPLY_NEW_PROFILE, DIM_APPLY_NEW_PROFILE,
}; };
/**
* enum dim_tune_state
*
* These are the DIM algorithm tune states.
* These will determine which action the algorithm should perform.
*
* @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
* @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0
* @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
* @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
*/
enum { enum {
DIM_PARKING_ON_TOP, DIM_PARKING_ON_TOP,
DIM_PARKING_TIRED, DIM_PARKING_TIRED,
@ -65,63 +148,95 @@ enum {
DIM_GOING_LEFT, DIM_GOING_LEFT,
}; };
/**
* enum dim_stats_state
*
* These are the DIM algorithm statistics states.
* These will determine the verdict of current iteration.
*
* @DIM_STATS_WORSE: Current iteration shows worse performance than before
* @DIM_STATS_WORSE: Current iteration shows same performance than before
* @DIM_STATS_WORSE: Current iteration shows better performance than before
*/
enum { enum {
DIM_STATS_WORSE, DIM_STATS_WORSE,
DIM_STATS_SAME, DIM_STATS_SAME,
DIM_STATS_BETTER, DIM_STATS_BETTER,
}; };
/**
* enum dim_step_result
*
* These are the DIM algorithm step results.
* These describe the result of a step.
*
* @DIM_STEPPED: Performed a regular step
* @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to
* tired parking
* @DIM_ON_EDGE: Stepped to the most left/right profile
*/
enum { enum {
DIM_STEPPED, DIM_STEPPED,
DIM_TOO_TIRED, DIM_TOO_TIRED,
DIM_ON_EDGE, DIM_ON_EDGE,
}; };
static inline bool dim_on_top(struct dim *dim) /**
{ * dim_on_top - check if current state is a good place to stop (top location)
switch (dim->tune_state) { * @dim: DIM context
case DIM_PARKING_ON_TOP: *
case DIM_PARKING_TIRED: * Check if current profile is a good place to park at.
return true; * This will result in reducing the DIM checks frequency as we assume we
case DIM_GOING_RIGHT: * shouldn't probably change profiles, unless traffic pattern wasn't changed.
return (dim->steps_left > 1) && (dim->steps_right == 1); */
default: /* DIM_GOING_LEFT */ bool dim_on_top(struct dim *dim);
return (dim->steps_right > 1) && (dim->steps_left == 1);
}
}
static inline void dim_turn(struct dim *dim) /**
{ * dim_turn - change profile alterning direction
switch (dim->tune_state) { * @dim: DIM context
case DIM_PARKING_ON_TOP: *
case DIM_PARKING_TIRED: * Go left if we were going right and vice-versa.
break; * Do nothing if currently parking.
case DIM_GOING_RIGHT: */
dim->tune_state = DIM_GOING_LEFT; void dim_turn(struct dim *dim);
dim->steps_left = 0;
break;
case DIM_GOING_LEFT:
dim->tune_state = DIM_GOING_RIGHT;
dim->steps_right = 0;
break;
}
}
static inline void dim_park_on_top(struct dim *dim) /**
{ * dim_park_on_top - enter a parking state on a top location
dim->steps_right = 0; * @dim: DIM context
dim->steps_left = 0; *
dim->tired = 0; * Enter parking state.
dim->tune_state = DIM_PARKING_ON_TOP; * Clear all movement history.
} */
void dim_park_on_top(struct dim *dim);
static inline void dim_park_tired(struct dim *dim) /**
{ * dim_park_tired - enter a tired parking state
dim->steps_right = 0; * @dim: DIM context
dim->steps_left = 0; *
dim->tune_state = DIM_PARKING_TIRED; * Enter parking state.
} * Clear all movement history and cause DIM checks frequency to reduce.
*/
void dim_park_tired(struct dim *dim);
/**
* dim_calc_stats - calculate the difference between two samples
* @start: start sample
* @end: end sample
* @curr_stats: delta between samples
*
* Calculate the delta between two samples (in data rates).
* Takes into consideration counter wrap-around.
*/
void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
struct dim_stats *curr_stats);
/**
* dim_update_sample - set a sample's fields with give values
* @event_ctr: number of events to set
* @packets: number of packets to set
* @bytes: number of bytes to set
* @s: DIM sample
*/
static inline void static inline void
dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
{ {
@ -131,23 +246,99 @@ dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
s->event_ctr = event_ctr; s->event_ctr = event_ctr;
} }
static inline void /* Net DIM */
dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
struct dim_stats *curr_stats)
{
/* u32 holds up to 71 minutes, should be enough */
u32 delta_us = ktime_us_delta(end->time, start->time);
u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
start->byte_ctr);
if (!delta_us) /*
return; * Net DIM profiles:
* There are different set of profiles for each CQ period mode.
* There are different set of profiles for RX/TX CQs.
* Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
*/
#define NET_DIM_PARAMS_NUM_PROFILES 5
#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
#define NET_DIM_DEF_PROFILE_CQE 1
#define NET_DIM_DEF_PROFILE_EQE 1
curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); #define NET_DIM_RX_EQE_PROFILES { \
curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
delta_us); {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
} }
#define NET_DIM_RX_CQE_PROFILES { \
{2, 256}, \
{8, 128}, \
{16, 64}, \
{32, 64}, \
{64, 64} \
}
#define NET_DIM_TX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
}
#define NET_DIM_TX_CQE_PROFILES { \
{5, 128}, \
{8, 64}, \
{16, 32}, \
{32, 32}, \
{64, 32} \
}
static const struct dim_cq_moder
rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_RX_EQE_PROFILES,
NET_DIM_RX_CQE_PROFILES,
};
static const struct dim_cq_moder
tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_TX_EQE_PROFILES,
NET_DIM_TX_CQE_PROFILES,
};
/**
* net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
* @cq_period_mode: CQ period mode
* @ix: Profile index
*/
struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix);
/**
* net_dim_get_def_rx_moderation - provide the default RX moderation
* @cq_period_mode: CQ period mode
*/
struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode);
/**
* net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile
* @cq_period_mode: CQ period mode
* @ix: Profile index
*/
struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix);
/**
* net_dim_get_def_tx_moderation - provide the default TX moderation
* @cq_period_mode: CQ period mode
*/
struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
/**
* net_dim - main DIM algorithm entry point
* @dim: DIM instance information
* @end_sample: Current data measurement
*
* Called by the consumer.
* This is the main logic of the algorithm, where data is processed in order to decide on next
* required action.
*/
void net_dim(struct dim *dim, struct dim_sample end_sample);
#endif /* DIM_H */ #endif /* DIM_H */

View File

@ -1,273 +0,0 @@
/*
* Copyright (c) 2016, Mellanox Technologies. All rights reserved.
* Copyright (c) 2017-2018, Broadcom Limited. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef NET_DIM_H
#define NET_DIM_H
#include <linux/module.h>
#include <linux/dim.h>
#define NET_DIM_PARAMS_NUM_PROFILES 5
/* Netdev dynamic interrupt moderation profiles */
#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
#define NET_DIM_DEF_PROFILE_CQE 1
#define NET_DIM_DEF_PROFILE_EQE 1
/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
#define NET_DIM_RX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
}
#define NET_DIM_RX_CQE_PROFILES { \
{2, 256}, \
{8, 128}, \
{16, 64}, \
{32, 64}, \
{64, 64} \
}
#define NET_DIM_TX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
}
#define NET_DIM_TX_CQE_PROFILES { \
{5, 128}, \
{8, 64}, \
{16, 32}, \
{32, 32}, \
{64, 32} \
}
static const struct dim_cq_moder
rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_RX_EQE_PROFILES,
NET_DIM_RX_CQE_PROFILES,
};
static const struct dim_cq_moder
tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_TX_EQE_PROFILES,
NET_DIM_TX_CQE_PROFILES,
};
static inline struct dim_cq_moder
net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
static inline struct dim_cq_moder
net_dim_get_def_rx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
}
static inline struct dim_cq_moder
net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
static inline struct dim_cq_moder
net_dim_get_def_tx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
static inline int net_dim_step(struct dim *dim)
{
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
return DIM_TOO_TIRED;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
break;
case DIM_GOING_RIGHT:
if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
return DIM_ON_EDGE;
dim->profile_ix++;
dim->steps_right++;
break;
case DIM_GOING_LEFT:
if (dim->profile_ix == 0)
return DIM_ON_EDGE;
dim->profile_ix--;
dim->steps_left++;
break;
}
dim->tired++;
return DIM_STEPPED;
}
static inline void net_dim_exit_parking(struct dim *dim)
{
dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT :
DIM_GOING_RIGHT;
net_dim_step(dim);
}
static inline int net_dim_stats_compare(struct dim_stats *curr,
struct dim_stats *prev)
{
if (!prev->bpms)
return curr->bpms ? DIM_STATS_BETTER :
DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->ppms)
return curr->ppms ? DIM_STATS_BETTER :
DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->epms)
return DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
return DIM_STATS_SAME;
}
static inline bool net_dim_decision(struct dim_stats *curr_stats,
struct dim *dim)
{
int prev_state = dim->tune_state;
int prev_ix = dim->profile_ix;
int stats_res;
int step_res;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
if (stats_res != DIM_STATS_SAME)
net_dim_exit_parking(dim);
break;
case DIM_PARKING_TIRED:
dim->tired--;
if (!dim->tired)
net_dim_exit_parking(dim);
break;
case DIM_GOING_RIGHT:
case DIM_GOING_LEFT:
stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
if (stats_res != DIM_STATS_BETTER)
dim_turn(dim);
if (dim_on_top(dim)) {
dim_park_on_top(dim);
break;
}
step_res = net_dim_step(dim);
switch (step_res) {
case DIM_ON_EDGE:
dim_park_on_top(dim);
break;
case DIM_TOO_TIRED:
dim_park_tired(dim);
break;
}
break;
}
if (prev_state != DIM_PARKING_ON_TOP ||
dim->tune_state != DIM_PARKING_ON_TOP)
dim->prev_stats = *curr_stats;
return dim->profile_ix != prev_ix;
}
static inline void net_dim(struct dim *dim,
struct dim_sample end_sample)
{
struct dim_stats curr_stats;
u16 nevents;
switch (dim->state) {
case DIM_MEASURE_IN_PROGRESS:
nevents = BIT_GAP(BITS_PER_TYPE(u16),
end_sample.event_ctr,
dim->start_sample.event_ctr);
if (nevents < DIM_NEVENTS)
break;
dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats);
if (net_dim_decision(&curr_stats, dim)) {
dim->state = DIM_APPLY_NEW_PROFILE;
schedule_work(&dim->work);
break;
}
/* fall through */
case DIM_START_MEASURE:
dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
end_sample.byte_ctr, &dim->start_sample);
dim->state = DIM_MEASURE_IN_PROGRESS;
break;
case DIM_APPLY_NEW_PROFILE:
break;
}
}
#endif /* NET_DIM_H */

View File

@ -562,6 +562,14 @@ config SIGNATURE
Digital signature verification. Currently only RSA is supported. Digital signature verification. Currently only RSA is supported.
Implementation is done using GnuPG MPI library Implementation is done using GnuPG MPI library
config DIMLIB
bool "DIM library"
default y
help
Dynamic Interrupt Moderation library.
Implements an algorithm for dynamically change CQ modertion values
according to run time performance.
# #
# libfdt files, only selected if needed. # libfdt files, only selected if needed.
# #

View File

@ -202,6 +202,7 @@ obj-$(CONFIG_GLOB) += glob.o
obj-$(CONFIG_GLOB_SELFTEST) += globtest.o obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
obj-$(CONFIG_MPILIB) += mpi/ obj-$(CONFIG_MPILIB) += mpi/
obj-$(CONFIG_DIMLIB) += dim/
obj-$(CONFIG_SIGNATURE) += digsig.o obj-$(CONFIG_SIGNATURE) += digsig.o
lib-$(CONFIG_CLZ_TAB) += clz_tab.o lib-$(CONFIG_CLZ_TAB) += clz_tab.o

9
lib/dim/Makefile Normal file
View File

@ -0,0 +1,9 @@
#
# DIM Dynamic Interrupt Moderation library
#
obj-$(CONFIG_DIMLIB) = net_dim.o
net_dim-y = \
dim.o \
net_dim.o

74
lib/dim/dim.c Normal file
View File

@ -0,0 +1,74 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/dim.h>
bool dim_on_top(struct dim *dim)
{
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
return true;
case DIM_GOING_RIGHT:
return (dim->steps_left > 1) && (dim->steps_right == 1);
default: /* DIM_GOING_LEFT */
return (dim->steps_right > 1) && (dim->steps_left == 1);
}
}
EXPORT_SYMBOL(dim_on_top);
void dim_turn(struct dim *dim)
{
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
break;
case DIM_GOING_RIGHT:
dim->tune_state = DIM_GOING_LEFT;
dim->steps_left = 0;
break;
case DIM_GOING_LEFT:
dim->tune_state = DIM_GOING_RIGHT;
dim->steps_right = 0;
break;
}
}
EXPORT_SYMBOL(dim_turn);
void dim_park_on_top(struct dim *dim)
{
dim->steps_right = 0;
dim->steps_left = 0;
dim->tired = 0;
dim->tune_state = DIM_PARKING_ON_TOP;
}
EXPORT_SYMBOL(dim_park_on_top);
void dim_park_tired(struct dim *dim)
{
dim->steps_right = 0;
dim->steps_left = 0;
dim->tune_state = DIM_PARKING_TIRED;
}
EXPORT_SYMBOL(dim_park_tired);
void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
struct dim_stats *curr_stats)
{
/* u32 holds up to 71 minutes, should be enough */
u32 delta_us = ktime_us_delta(end->time, start->time);
u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
start->byte_ctr);
if (!delta_us)
return;
curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC,
delta_us);
}
EXPORT_SYMBOL(dim_calc_stats);

190
lib/dim/net_dim.c Normal file
View File

@ -0,0 +1,190 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/dim.h>
struct dim_cq_moder
net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_rx_moderation);
struct dim_cq_moder
net_dim_get_def_rx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
struct dim_cq_moder
net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_tx_moderation);
struct dim_cq_moder
net_dim_get_def_tx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
static int net_dim_step(struct dim *dim)
{
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
return DIM_TOO_TIRED;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
break;
case DIM_GOING_RIGHT:
if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
return DIM_ON_EDGE;
dim->profile_ix++;
dim->steps_right++;
break;
case DIM_GOING_LEFT:
if (dim->profile_ix == 0)
return DIM_ON_EDGE;
dim->profile_ix--;
dim->steps_left++;
break;
}
dim->tired++;
return DIM_STEPPED;
}
static void net_dim_exit_parking(struct dim *dim)
{
dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
net_dim_step(dim);
}
static int net_dim_stats_compare(struct dim_stats *curr,
struct dim_stats *prev)
{
if (!prev->bpms)
return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->ppms)
return curr->ppms ? DIM_STATS_BETTER :
DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->epms)
return DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
return DIM_STATS_SAME;
}
static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
{
int prev_state = dim->tune_state;
int prev_ix = dim->profile_ix;
int stats_res;
int step_res;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_SAME)
net_dim_exit_parking(dim);
break;
case DIM_PARKING_TIRED:
dim->tired--;
if (!dim->tired)
net_dim_exit_parking(dim);
break;
case DIM_GOING_RIGHT:
case DIM_GOING_LEFT:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_BETTER)
dim_turn(dim);
if (dim_on_top(dim)) {
dim_park_on_top(dim);
break;
}
step_res = net_dim_step(dim);
switch (step_res) {
case DIM_ON_EDGE:
dim_park_on_top(dim);
break;
case DIM_TOO_TIRED:
dim_park_tired(dim);
break;
}
break;
}
if (prev_state != DIM_PARKING_ON_TOP ||
dim->tune_state != DIM_PARKING_ON_TOP)
dim->prev_stats = *curr_stats;
return dim->profile_ix != prev_ix;
}
void net_dim(struct dim *dim, struct dim_sample end_sample)
{
struct dim_stats curr_stats;
u16 nevents;
switch (dim->state) {
case DIM_MEASURE_IN_PROGRESS:
nevents = BIT_GAP(BITS_PER_TYPE(u16),
end_sample.event_ctr,
dim->start_sample.event_ctr);
if (nevents < DIM_NEVENTS)
break;
dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats);
if (net_dim_decision(&curr_stats, dim)) {
dim->state = DIM_APPLY_NEW_PROFILE;
schedule_work(&dim->work);
break;
}
/* fall through */
case DIM_START_MEASURE:
dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
end_sample.byte_ctr, &dim->start_sample);
dim->state = DIM_MEASURE_IN_PROGRESS;
break;
case DIM_APPLY_NEW_PROFILE:
break;
}
}
EXPORT_SYMBOL(net_dim);