Merge branch 'selftests-mptcp-mark-unstable-subtests-as-flaky'

Matthieu Baerts says:

====================
selftests: mptcp: mark unstable subtests as flaky

Some subtests can be unstable, failing once every X runs. Fixing them
can take time: there could be an issue in the kernel or in the subtest,
and it is then important to do a proper analysis, not to hide real bugs.

To avoid creating noises on the different CIs where tests are more
unstable than on our side, some subtests have been marked as flaky. As a
result, errors with these subtests (if any) are ignored.

Note that the MPTCP CI will continue to track these flaky subtests. All
these unstable subtests are also tracked by our bug tracker.

These are fixes for the -net tree, because the instabilities are visible
there. The first patch introducing the flake support has no 'Fixes'
tags, mainly because it requires recent and important refactoring done
in all MPTCP selftests. Backporting that to old versions where the flaky
tests have been introduced would be too difficult, and probably not
worth it. The other patches, adding MPTCP_LIB_SUBTEST_FLAKY=1, have a
Fixes tag, simply to ease the backport of the future fixes removing them
along with the proper fix.
====================

Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-0-a352362f3f8e@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-05-27 17:13:01 -07:00
commit 6e15774d92
3 changed files with 40 additions and 6 deletions

View file

@ -261,6 +261,8 @@ reset()
TEST_NAME="${1}"
MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified
if skip_test; then
MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
last_test_ignored=1
@ -448,7 +450,9 @@ reset_with_tcp_filter()
# $1: err msg
fail_test()
{
ret=${KSFT_FAIL}
if ! mptcp_lib_subtest_is_flaky; then
ret=${KSFT_FAIL}
fi
if [ ${#} -gt 0 ]; then
print_fail "${@}"
@ -3069,6 +3073,7 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=client \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
@ -3077,6 +3082,7 @@ fastclose_tests()
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0 0 0 0 1
@ -3095,6 +3101,7 @@ fail_tests()
{
# single subflow
if reset_with_fail "Infinite map" 1; then
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=128 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
@ -3103,6 +3110,7 @@ fail_tests()
# multiple subflows
if reset_with_fail "MP_FAIL MP_RST" 2; then
MPTCP_LIB_SUBTEST_FLAKY=1
tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1

View file

@ -21,6 +21,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
MPTCP_LIB_SUBTEST_FLAKY=0
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
MPTCP_LIB_IP_MPTCP=0
@ -41,6 +42,16 @@ else
readonly MPTCP_LIB_COLOR_RESET=
fi
# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors
# from subtests marked as flaky
mptcp_lib_override_flaky() {
[ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ]
}
mptcp_lib_subtest_is_flaky() {
[ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky
}
# $1: color, $2: text
mptcp_lib_print_color() {
echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
@ -72,7 +83,16 @@ mptcp_lib_pr_skip() {
}
mptcp_lib_pr_fail() {
mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
local title cmt
if mptcp_lib_subtest_is_flaky; then
title="IGNO"
cmt=" (flaky)"
else
title="FAIL"
fi
mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}"
}
mptcp_lib_pr_info() {
@ -208,7 +228,13 @@ mptcp_lib_result_pass() {
# $1: test name
mptcp_lib_result_fail() {
__mptcp_lib_result_add "not ok" "${1}"
if mptcp_lib_subtest_is_flaky; then
# It might sound better to use 'not ok # TODO' or 'ok # SKIP',
# but some CIs don't understand 'TODO' and treat SKIP as errors.
__mptcp_lib_result_add "ok" "${1} # IGNORE Flaky"
else
__mptcp_lib_result_add "not ok" "${1}"
fi
}
# $1: test name

View file

@ -244,7 +244,7 @@ run_test()
do_transfer $small $large $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
if [ $lret -ne 0 ]; then
if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
fi
@ -254,7 +254,7 @@ run_test()
do_transfer $large $small $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
if [ $lret -ne 0 ]; then
if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
fi
@ -290,7 +290,7 @@ run_test 10 10 0 0 "balanced bwidth"
run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
# we still need some additional infrastructure to pass the following test-cases
run_test 10 3 0 0 "unbalanced bwidth"
MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth"
run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"