From fdaf77707629ba218f15cc876cd414f718e5a4f4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 17 Nov 2019 11:33:01 -0800 Subject: [PATCH] gitlab-ci/deqp: detect and report flakes If there are a small number of fails, re-run to determine if they are flakes, and optionally (if `$FLAKES_CHANNEL` configured) report the flakes. This way flakes don't interfere with developers working on other drivers, but get logged so that the developers working on the flaking driver can monitor the situation. Signed-off-by: Rob Clark Acked-by: Eric Engestrom --- .gitlab-ci.yml | 3 +- .gitlab-ci/container/arm_test.sh | 1 + .gitlab-ci/deqp-runner.sh | 81 +++++++++++++++++++++++++++++++++++----- 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e074f0ac587..6ba2cd5e327 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -461,7 +461,7 @@ scons-old-llvm: - tar -xf artifacts/install.tar - LD_LIBRARY_PATH=install/lib find install/lib -name "*.so" -print -exec ldd {} \; artifacts: - when: on_failure + when: always name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME" paths: - results/ @@ -555,6 +555,7 @@ arm64_a630_gles2: DEQP_SKIPS: deqp-freedreno-a630-skips.txt NIR_VALIDATE: 0 DEQP_PARALLEL: 4 + FLAKES_CHANNEL: "#freedreno-ci" tags: - mesa-cheza dependencies: diff --git a/.gitlab-ci/container/arm_test.sh b/.gitlab-ci/container/arm_test.sh index 2ea6327cb84..16dc0ddca9a 100644 --- a/.gitlab-ci/container/arm_test.sh +++ b/.gitlab-ci/container/arm_test.sh @@ -26,6 +26,7 @@ apt-get -y install \ libvulkan-dev \ libvulkan1 \ meson \ + netcat \ pkg-config \ procps \ python \ diff --git a/.gitlab-ci/deqp-runner.sh b/.gitlab-ci/deqp-runner.sh index 58aa7759b7a..989a222967e 100755 --- a/.gitlab-ci/deqp-runner.sh +++ b/.gitlab-ci/deqp-runner.sh @@ -57,15 +57,56 @@ fi set +e -vulkan-cts-runner \ - --deqp /deqp/modules/$DEQP_VER/deqp-$DEQP_VER \ - --output $RESULTS/cts-runner-results.txt \ - --caselist /tmp/case-list.txt \ - --exclude-list $ARTIFACTS/$DEQP_SKIPS \ - $XFAIL \ - --job ${DEQP_PARALLEL:-1} \ - -- \ - "${DEQP_OPTIONS[@]}" +run_cts() { + caselist=$1 + output=$2 + deqp-runner \ + --deqp /deqp/modules/$DEQP_VER/deqp-$DEQP_VER \ + --output $output \ + --caselist $caselist \ + --exclude-list $ARTIFACTS/$DEQP_SKIPS \ + $XFAIL \ + --job ${DEQP_PARALLEL:-1} \ + --allow-flakes true \ + -- \ + "${DEQP_OPTIONS[@]}" +} + +report_flakes() { + if [ -z "$FLAKES_CHANNEL" ]; then + return 0 + fi + flakes=$1 + bot="$CI_RUNNER_DESCRIPTION-$CI_PIPELINE_ID" + channel="$FLAKES_CHANNEL" + ( + echo NICK $bot + echo USER $bot unused unused :Gitlab CI Notifier + sleep 10 + echo "JOIN $channel" + sleep 1 + desc="Flakes detected in job: $CI_JOB_URL on $CI_RUNNER_DESCRIPTION" + if [ -n "CI_MERGE_REQUEST_SOURCE_BRANCH_NAME" ]; then + desc="$desc on branch $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME ($CI_MERGE_REQUEST_TITLE)" + fi + echo "PRIVMSG $channel :$desc" + for flake in `cat $flakes`; do + echo "PRIVMSG $channel :$flake" + done + echo "PRIVMSG $channel :See $CI_JOB_URL/artifacts/browse/results/" + echo "QUIT" + ) | nc irc.freenode.net 6667 > /dev/null + +} + +# wrapper to supress +x to avoid spamming the log +quiet() { + set +x + "$@" + set -x +} + +run_cts /tmp/case-list.txt $RESULTS/cts-runner-results.txt DEQP_EXITCODE=$? if [ $DEQP_EXITCODE -ne 0 ]; then @@ -78,6 +119,28 @@ if [ $DEQP_EXITCODE -ne 0 ]; then grep -v ",ExpectedFail" > \ $RESULTS/cts-runner-unexpected-results.txt head -n 50 $RESULTS/cts-runner-unexpected-results.txt + + count=`cat $RESULTS/cts-runner-unexpected-results.txt | wc -l` + + # Re-run fails to detect flakes. But use a small threshold, if + # something was fundamentally broken, we don't want to re-run + # the entire caselist +else + cat $RESULTS/cts-runner-results.txt | \ + grep ",Flake" > \ + $RESULTS/cts-runner-flakes.txt + + count=`cat $RESULTS/cts-runner-flakes.txt | wc -l` + if [ $count -gt 0 ]; then + echo "Some flakes found (see cts-runner-flakes.txt in artifacts for full results):" + head -n 50 $RESULTS/cts-runner-flakes.txt + + # Report the flakes to IRC channel for monitoring (if configured): + quiet report_flakes $RESULTS/cts-runner-flakes.txt + else + # no flakes, so clean-up: + rm $RESULTS/cts-runner-flakes.txt + fi fi exit $DEQP_EXITCODE -- 2.11.0