rsyslog/tests/diag.sh
Rainer Gerhards 5b3b1b0f63 testbench: ensure that test requiring journalctl work
and do not fail just because journalctl is missing on the system.
The latter e.g. is the case for example on Ubuntu 14.04, which has
journal support inside the OS, but no journalctl.
2016-03-20 16:15:28 +01:00

483 lines
18 KiB
Bash
Executable File

#!/bin/bash
#
# this shell script provides commands to the common diag system. It enables
# test scripts to wait for certain conditions and initiate certain actions.
# needs support in config file.
# NOTE: this file should be included with ". diag.sh", as it otherwise is
# not always able to convey back states to the upper-level test driver
# begun 2009-05-27 by rgerhards
# This file is part of the rsyslog project, released under GPLv3
#
# This file can be customized to environment specifics via environment
# variables:
# RS_SORTCMD Sort command to use (must support -g option). If unset,
# "sort" is used. E.g. Solaris needs "gsort"
#
# environment variables:
# USE_AUTO_DEBUG "on" --> enables automatic debugging, anything else
# turns it off
# diag system internal environment variables
# these variables are for use by test scripts - they CANNOT be
# overriden by the user
# TCPFLOOD_EXTRA_OPTS enables to set extra options for tcpflood, usually
# used in tests that have a common driver where it
# is too hard to set these options otherwise
#valgrind="valgrind --malloc-fill=ff --free-fill=fe --log-fd=1"
# **** use the line below for very hard to find leaks! *****
#valgrind="valgrind --leak-check=full --show-leak-kinds=all --malloc-fill=ff --free-fill=fe --log-fd=1"
#valgrind="valgrind --tool=drd --log-fd=1"
#valgrind="valgrind --tool=helgrind --log-fd=1"
#valgrind="valgrind --tool=exp-ptrcheck --log-fd=1"
#set -o xtrace
#export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction nostdout"
#export RSYSLOG_DEBUGLOG="log"
TB_TIMEOUT_STARTSTOP=3000 # timeout for start/stop rsyslogd in tenths (!) of a second 3000 => 5 min
case $1 in
'init') $srcdir/killrsyslog.sh # kill rsyslogd if it runs for some reason
if [ -z $RS_SORTCMD ]; then
RS_SORTCMD=sort
fi
if [ "x$2" != "x" ]; then
echo "------------------------------------------------------------"
echo "Test: $0"
echo "------------------------------------------------------------"
fi
cp $srcdir/testsuites/diag-common.conf diag-common.conf
cp $srcdir/testsuites/diag-common2.conf diag-common2.conf
rm -f rsyslogd.started work-*.conf rsyslog.random.data
rm -f rsyslogd2.started work-*.conf
rm -f work rsyslog.out.log rsyslog2.out.log rsyslog.out.log.save # common work files
rm -rf test-spool test-logdir stat-file1
rm -f rsyslog.out.*.log work-presort rsyslog.pipe
rm -f rsyslog.input rsyslog.empty
rm -f testconf.conf
rm -f rsyslog.errorfile
rm -f core.* vgcore.*
# Note: rsyslog.action.*.include must NOT be deleted, as it
# is used to setup some parameters BEFORE calling init. This
# happens in chained test scripts. Delete on exit is fine,
# though.
mkdir test-spool
ulimit -c 4000000000
# note: TCPFLOOD_EXTRA_OPTS MUST NOT be unset in init, because
# some tests need to set it BEFORE calling init to accomodate
# their generic test drivers.
if [ "$TCPFLOOD_EXTRA_OPTS" != '' ] ; then
echo TCPFLOOD_EXTRA_OPTS set: $TCPFLOOD_EXTRA_OPTS
fi
if [ "$USE_AUTO_DEBUG" != 'on' ] ; then
rm -f IN_AUTO_DEBUG
fi
if [ -e IN_AUTO_DEBUG ]; then
export valgrind="valgrind --malloc-fill=ff --free-fill=fe --log-fd=1"
fi
;;
'exit') rm -f rsyslogd.started work-*.conf diag-common.conf
rm -f rsyslogd2.started diag-common2.conf rsyslog.action.*.include
rm -f work rsyslog.out.log rsyslog2.out.log rsyslog.out.log.save # common work files
rm -rf test-spool test-logdir stat-file1
rm -f rsyslog.out.*.log rsyslog.random.data work-presort rsyslog.pipe
rm -f rsyslog.input rsyslog.conf.tlscert stat-file1 rsyslog.empty
rm -f testconf.conf
rm -f rsyslog.errorfile
rm -f HOSTNAME imfile-state:.-rsyslog.input
unset TCPFLOOD_EXTRA_OPTS
echo -------------------------------------------------------------------------------
;;
'es-init') # initialize local Elasticsearch *testbench* instance for the next
# test. NOTE: do NOT put anything useful on that instance!
curl -XDELETE localhost:9200/rsyslog_testbench
;;
'es-getdata') # read data from ES to a local file so that we can process
# it with out regular tooling.
# Note: param 2 MUST be number of records to read (ES does
# not return the full set unless you tell it explicitely).
curl localhost:9200/rsyslog_testbench/_search?size=$2 > work
python $srcdir/es_response_get_msgnum.py > rsyslog.out.log
;;
'startup') # start rsyslogd with default params. $2 is the config file name to use
# returns only after successful startup, $3 is the instance (blank or 2!)
if [ "x$2" == "x" ]; then
CONF_FILE="testconf.conf"
else
CONF_FILE="$srcdir/testsuites/$2"
fi
if [ ! -f $CONF_FILE ]; then
echo "ERROR: config file '$CONF_FILE' not found!"
exit 1
fi
$valgrind ../tools/rsyslogd -C -n -irsyslog$3.pid -M../runtime/.libs:../.libs -f$CONF_FILE &
. $srcdir/diag.sh wait-startup $3
;;
'startup-silent') # start rsyslogd with default params. $2 is the config file name to use
# returns only after successful startup, $3 is the instance (blank or 2!)
if [ ! -f $srcdir/testsuites/$2 ]; then
echo "ERROR: config file '$srcdir/testsuites/$2' not found!"
exit 1
fi
$valgrind ../tools/rsyslogd -C -n -irsyslog$3.pid -M../runtime/.libs:../.libs -f$srcdir/testsuites/$2 2>/dev/null &
. $srcdir/diag.sh wait-startup $3
;;
'startup-vg') # start rsyslogd with default params under valgrind control. $2 is the config file name to use
# returns only after successful startup, $3 is the instance (blank or 2!)
if [ "x$2" == "x" ]; then
CONF_FILE="testconf.conf"
else
CONF_FILE="$srcdir/testsuites/$2"
fi
if [ ! -f $CONF_FILE ]; then
echo "ERROR: config file '$CONF_FILE' not found!"
exit 1
fi
valgrind $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --log-fd=1 --error-exitcode=10 --malloc-fill=ff --free-fill=fe --leak-check=full ../tools/rsyslogd -C -n -irsyslog$3.pid -M../runtime/.libs:../.libs -f$CONF_FILE &
. $srcdir/diag.sh wait-startup $3
echo startup-vg still running
;;
'startup-vg-noleak') # same as startup-vg, except that --leak-check is set to "none". This
# is meant to be used in cases where we have to deal with libraries (and such
# that) we don't can influence and where we cannot provide suppressions as
# they are platform-dependent. In that case, we can't test for leak checks
# (obviously), but we can check for access violations, what still is useful.
if [ ! -f $srcdir/testsuites/$2 ]; then
echo "ERROR: config file '$srcdir/testsuites/$2' not found!"
exit 1
fi
valgrind $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --log-fd=1 --error-exitcode=10 --malloc-fill=ff --free-fill=fe --leak-check=no ../tools/rsyslogd -C -n -irsyslog$3.pid -M../runtime/.libs:../.libs -f$srcdir/testsuites/$2 &
. $srcdir/diag.sh wait-startup $3
echo startup-vg still running
;;
'msleep')
$srcdir/msleep $2
;;
'wait-startup') # wait for rsyslogd startup ($2 is the instance)
i=0
while test ! -f rsyslog$2.pid; do
./msleep 100 # wait 100 milliseconds
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on startup (pid file)"
. $srcdir/diag.sh error-exit 1
fi
done
i=0
while test ! -f rsyslogd$2.started; do
./msleep 100 # wait 100 milliseconds
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on startup ('started' file)"
. $srcdir/diag.sh error-exit 1
fi
done
echo "rsyslogd$2 started with pid " `cat rsyslog$2.pid`
;;
'wait-shutdown') # actually, we wait for rsyslog.pid to be deleted. $2 is the
# instance
i=0
while test -f rsyslog$2.pid; do
./msleep 100 # wait 100 milliseconds
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on shutdown"
echo "Instance is possibly still running and may need"
echo "manual cleanup."
exit 1
fi
done
if [ -e core.* ]
then
echo "ABORT! core file exists"
. $srcdir/diag.sh error-exit 1
fi
;;
'wait-shutdown-vg') # actually, we wait for rsyslog.pid to be deleted. $2 is the
# instance
wait `cat rsyslog.pid`
export RSYSLOGD_EXIT=$?
echo rsyslogd run exited with $RSYSLOGD_EXIT
if [ -e vgcore.* ]
then
echo "ABORT! core file exists"
. $srcdir/diag.sh error-exit 1
fi
;;
'check-exit-vg') # wait for main message queue to be empty. $2 is the instance.
if [ "$RSYSLOGD_EXIT" -eq "10" ]
then
echo "valgrind run FAILED with exceptions - terminating"
exit 1
fi
;;
'get-mainqueuesize') # show the current main queue size
if [ "$2" == "2" ]
then
echo getmainmsgqueuesize | ./diagtalker -p13501 || . $srcdir/diag.sh error-exit $?
else
echo getmainmsgqueuesize | ./diagtalker || . $srcdir/diag.sh error-exit $?
fi
;;
'wait-queueempty') # wait for main message queue to be empty. $2 is the instance.
if [ "$2" == "2" ]
then
echo WaitMainQueueEmpty | ./diagtalker -p13501 || . $srcdir/diag.sh error-exit $?
else
echo WaitMainQueueEmpty | ./diagtalker || . $srcdir/diag.sh error-exit $?
fi
;;
'await-lookup-table-reload') # wait for all pending lookup table reloads to complete $2 is the instance.
if [ "$2" == "2" ]
then
echo AwaitLookupTableReload | ./diagtalker -p13501 || . $srcdir/diag.sh error-exit $?
else
echo AwaitLookupTableReload | ./diagtalker || . $srcdir/diag.sh error-exit $?
fi
;;
'issue-HUP') # shut rsyslogd down when main queue is empty. $2 is the instance.
kill -HUP `cat rsyslog$2.pid`
./msleep 1000
;;
'shutdown-when-empty') # shut rsyslogd down when main queue is empty. $2 is the instance.
if [ "$2" == "2" ]
then
echo Shutting down instance 2
fi
. $srcdir/diag.sh wait-queueempty $2
./msleep 1000 # wait a bit (think about slow testbench machines!)
kill `cat rsyslog$2.pid`
# note: we do not wait for the actual termination!
;;
'shutdown-immediate') # shut rsyslogd down without emptying the queue. $2 is the instance.
kill `cat rsyslog.pid`
# note: we do not wait for the actual termination!
;;
'tcpflood') # do a tcpflood run and check if it worked params are passed to tcpflood
shift 1
eval ./tcpflood $* $TCPFLOOD_EXTRA_OPTS
if [ "$?" -ne "0" ]; then
echo "error during tcpflood! see rsyslog.out.log.save for what was written"
cp rsyslog.out.log rsyslog.out.log.save
. $srcdir/diag.sh error-exit 1
fi
;;
'injectmsg') # inject messages via our inject interface (imdiag)
echo injecting $3 messages
echo injectmsg $2 $3 $4 $5 | ./diagtalker || . $srcdir/diag.sh error-exit $?
# TODO: some return state checking? (does it really make sense here?)
;;
'injectmsg-litteral') # inject litteral-payload via our inject interface (imdiag)
echo injecting msg payload from: $2
cat $2 | sed -e 's/^/injectmsg litteral /g' | ./diagtalker || . $srcdir/diag.sh error-exit $?
# TODO: some return state checking? (does it really make sense here?)
;;
'check-mainq-spool') # check if mainqueue spool files exist, if not abort (we just check .qi).
echo There must exist some files now:
ls -l test-spool
if test ! -f test-spool/mainq.qi; then
echo "error: mainq.qi does not exist where expected to do so!"
ls -l test-spool
. $srcdir/diag.sh error-exit 1
fi
;;
'seq-check') # do the usual sequence check to see if everything was properly received. $2 is the instance.
rm -f work
cp rsyslog.out.log work-presort
$RS_SORTCMD -g < rsyslog.out.log > work
# $4... are just to have the abilit to pass in more options...
# add -v to chkseq if you need more verbose output
./chkseq -fwork -s$2 -e$3 $4 $5 $6 $7
if [ "$?" -ne "0" ]; then
echo "sequence error detected"
. $srcdir/diag.sh error-exit 1
fi
;;
'seq-check2') # do the usual sequence check to see if everything was properly received. This is
# a duplicateof seq-check, but we could not change its calling conventions without
# breaking a lot of exitings test cases, so we preferred to duplicate the code here.
rm -f work2
$RS_SORTCMD -g < rsyslog2.out.log > work2
# $4... are just to have the abilit to pass in more options...
# add -v to chkseq if you need more verbose output
./chkseq -fwork2 -s$2 -e$3 $4 $5 $6 $7
if [ "$?" -ne "0" ]; then
echo "sequence error detected"
. $srcdir/diag.sh error-exit 1
fi
rm -f work2
;;
'content-check')
cat rsyslog.out.log | grep -qF "$2"
if [ "$?" -ne "0" ]; then
echo content-check failed to find "'$2'", content is
cat rsyslog.out.log
. $srcdir/diag.sh error-exit 1
fi
;;
'content-check-with-count')
count=$(cat rsyslog.out.log | grep -qF "$2" | wc -l)
if [ "x$count" == "x$3" ]; then
echo content-check failed, expected $2 to occure $3 times, but found it $count times
. $srcdir/diag.sh error-exit 1
fi
;;
'wait-for-stats-flush')
echo "will wait for stats push"
while [[ ! -f $2 ]]; do
echo waiting for stats file "'$2'" to be created
./msleep 100
done
prev_count=$(cat $2 | grep 'BEGIN$' | wc -l)
new_count=$prev_count
while [[ "x$prev_count" == "x$new_count" ]]; do
new_count=$(cat $2 | grep 'BEGIN$' | wc -l) # busy spin, because it allows as close timing-coordination in actual test run as possible
done
echo "stats push registered"
;;
'wait-for-dyn-stats-reset')
echo "will wait for dyn-stats-reset"
while [[ ! -f $2 ]]; do
echo waiting for stats file "'$2'" to be created
./msleep 100
done
prev_purged=$(cat $2 | grep -F 'origin=dynstats' | grep -F "${3}.purge_triggered=" | sed -e 's/.\+.purge_triggered=//g' | awk '{s+=$1} END {print s}')
new_purged=$prev_purged
while [[ "x$prev_purged" == "x$new_purged" ]]; do
new_purged=$(cat $2 | grep -F 'origin=dynstats' | grep -F "${3}.purge_triggered=" | sed -e 's/.\+\.purge_triggered=//g' | awk '{s+=$1} END {print s}') # busy spin, because it allows as close timing-coordination in actual test run as possible
./msleep 10
done
echo "dyn-stats reset for bucket ${3} registered"
;;
'custom-content-check')
cat $3 | grep -qF "$2"
if [ "$?" -ne "0" ]; then
echo content-check failed to find "'$2'" inside "'$3'"
. $srcdir/diag.sh error-exit 1
fi
;;
'first-column-sum-check')
sum=$(cat $4 | grep $3 | sed -e $2 | awk '{s+=$1} END {print s}')
if [ "x${sum}" != "x$5" ]; then
echo sum of first column with edit-expr "'$2'" run over lines from file "'$4'" matched by "'$3'" equals "'$sum'" which is not equal to expected value of "'$5'"
. $srcdir/diag.sh error-exit 1
fi
;;
'assert-first-column-sum-greater-than')
sum=$(cat $4 | grep $3 | sed -e $2 | awk '{s+=$1} END {print s}')
if [ ! $sum -gt $5 ]; then
echo sum of first column with edit-expr "'$2'" run over lines from file "'$4'" matched by "'$3'" equals "'$sum'" which is smaller than expected lower-limit of "'$5'"
. $srcdir/diag.sh error-exit 1
fi
;;
'content-pattern-check')
cat rsyslog.out.log | grep -q "$2"
if [ "$?" -ne "0" ]; then
echo content-check failed, not every line matched pattern "'$2'"
. $srcdir/diag.sh error-exit 1
fi
;;
'assert-content-missing')
cat rsyslog.out.log | grep -qF "$2"
if [ "$?" -eq "0" ]; then
echo content-missing assertion failed, some line matched pattern "'$2'"
. $srcdir/diag.sh error-exit 1
fi
;;
'custom-assert-content-missing')
cat $3 | grep -qF "$2"
if [ "$?" -eq "0" ]; then
echo content-missing assertion failed, some line in "'$3'" matched pattern "'$2'"
. $srcdir/diag.sh error-exit 1
fi
;;
'gzip-seq-check') # do the usual sequence check, but for gzip files
rm -f work
ls -l rsyslog.out.log
gunzip < rsyslog.out.log | $RS_SORTCMD -g > work
ls -l work
# $4... are just to have the abilit to pass in more options...
./chkseq -fwork -v -s$2 -e$3 $4 $5 $6 $7
if [ "$?" -ne "0" ]; then
echo "sequence error detected"
. $srcdir/diag.sh error-exit 1
fi
;;
'nettester') # perform nettester-based tests
# use -v for verbose output!
./nettester -t$2 -i$3 $4
if [ "$?" -ne "0" ]; then
. $srcdir/diag.sh error-exit 1
fi
;;
'setzcat') # find out name of zcat tool
if [ `uname` == SunOS ]; then
ZCAT=gzcat
else
ZCAT=zcat
fi
;;
'generate-HOSTNAME') # generate the HOSTNAME file
. $srcdir/diag.sh startup gethostname.conf
. $srcdir/diag.sh tcpflood -m1 -M "\"<128>\""
./msleep 100
. $srcdir/diag.sh shutdown-when-empty # shut down rsyslogd when done processing messages
. $srcdir/diag.sh wait-shutdown # we need to wait until rsyslogd is finished!
;;
'generate-conf') # start a standard test rsyslog.conf
echo "\$IncludeConfig diag-common.conf" > testconf.conf
;;
'add-conf') # start a standard test rsyslog.conf
echo "$2" >> testconf.conf
;;
'require-journalctl') # check if journalctl exists on the system
if ! hash journalctl 2>/dev/null ; then
echo "journalctl command missing, skipping test"
exit 77
fi
;;
'error-exit') # this is called if we had an error and need to abort. Here, we
# try to gather as much information as possible. That's most important
# for systems like Travis-CI where we cannot debug on the machine itself.
# our $2 is the to-be-used exit code.
if [[ ! -e IN_AUTO_DEBUG && "$USE_AUTO_DEBUG" == 'on' ]]; then
touch IN_AUTO_DEBUG
if [ -e core* ]
then
echo trying to analyze core for main rsyslogd binary
echo note: this may not be the correct file, check it
CORE=`ls core*`
echo "set pagination off" >gdb.in
echo "core $CORE" >>gdb.in
echo "info thread" >> gdb.in
echo "thread apply all bt full" >> gdb.in
echo "q" >> gdb.in
gdb ../tools/rsyslogd < gdb.in
CORE=
rm gdb.in
fi
# OK, we have the testname and can re-run under valgrind
echo re-running under valgrind control
current_test="./$(basename $0)" # this path is probably wrong -- theinric
$current_test
# wait a little bit so that valgrind can finish
./msleep 4000
# next let's try us to get a debug log
RSYSLOG_DEBUG_SAVE=$RSYSLOG_DEBUG
export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction"
$current_test
./msleep 4000
RSYSLOG_DEBUG=$RSYSLOG_DEBUG_SAVE
rm IN_AUTO_DEBUG
fi
exit $2
;;
*) echo "invalid argument" $1
esac