rsyslog/tests/diag.sh
Rainer Gerhards 33f72c53b3
Merge pull request #3166 from rgerhards/tb-omkafka
testbench: try to make omkafka test more stable
2018-10-24 08:49:55 +02:00

1760 lines
55 KiB
Bash
Executable File

#!/bin/bash
#
# this shell script provides commands to the common diag system. It enables
# test scripts to wait for certain conditions and initiate certain actions.
# needs support in config file.
# NOTE: this file should be included with ". diag.sh", as it otherwise is
# not always able to convey back states to the upper-level test driver
# begun 2009-05-27 by rgerhards
# This file is part of the rsyslog project, released under GPLv3
#
# This file can be customized to environment specifics via environment
# variables:
# SUDO either blank, or the command to use for sudo (usually "sudo").
# This env var is sometimes used to increase the chance that we
# get extra information, e.g. when trying to analyze running
# processes. Bottom line: if sudo is possible and you are OK with
# the testbench utilizing this, use
# export SUDO=sudo
# to activate the extra functionality.
# RS_SORTCMD Sort command to use (must support $RS_SORT_NUMERIC_OPT option). If unset,
# "sort" is used. E.g. Solaris needs "gsort"
# RS_SORT_NUMERIC_OPT option to use for numerical sort, If unset "-g" is used.
# RS_CMPCMD cmp command to use. If unset, "cmd" is used.
# E.g. Solaris needs "gcmp"
# RS_HEADCMD head command to use. If unset, "head" is used.
# E.g. Solaris needs "ghead"
# RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT
# global input timeout shutdown, default 60000 (60) sec. This should
# only be set specifically if there is good need to do so, e.g. if
# a test needs to timeout.
#
# environment variables:
# USE_AUTO_DEBUG "on" --> enables automatic debugging, anything else
# turns it off
# diag system internal environment variables
# these variables are for use by test scripts - they CANNOT be
# overriden by the user
# TCPFLOOD_EXTRA_OPTS enables to set extra options for tcpflood, usually
# used in tests that have a common driver where it
# is too hard to set these options otherwise
#valgrind="valgrind --malloc-fill=ff --free-fill=fe --log-fd=1"
# **** use the line below for very hard to find leaks! *****
#valgrind="valgrind --leak-check=full --show-leak-kinds=all --malloc-fill=ff --free-fill=fe --log-fd=1"
#valgrind="valgrind --tool=drd --log-fd=1"
#valgrind="valgrind --tool=helgrind --log-fd=1 --suppressions=$srcdir/linux_localtime_r.supp --gen-suppressions=all"
#valgrind="valgrind --tool=exp-ptrcheck --log-fd=1"
#set -o xtrace
#export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction nostdout"
#export RSYSLOG_DEBUGLOG="log"
TB_TIMEOUT_STARTSTOP=400 # timeout for start/stop rsyslogd in tenths (!) of a second 400 => 40 sec
# note that 40sec for the startup should be sufficient even on very slow machines. we changed this from 2min on 2017-12-12
export RSYSLOG_DEBUG_TIMEOUTS_TO_STDERR="on" # we want to know when we loose messages due to timeouts
if [ "$srcdir" == "" ]; then
printf "\$srcdir not set, for a manual build, do 'export srcdir=\$(pwd)'\n"
fi
if [ "$TESTTOOL_DIR" == "" ]; then
export TESTTOOL_DIR="$srcdir"
fi
# newer functionality is preferrably introduced via bash functions
# rgerhards, 2018-07-03
function rsyslog_testbench_test_url_access() {
local missing_requirements=
if ! hash curl 2>/dev/null ; then
missing_requirements="'curl' is missing in PATH; Make sure you have cURL installed! Skipping test ..."
fi
if [ -n "${missing_requirements}" ]; then
echo ${missing_requirements}
exit 77
fi
local http_endpoint="$1"
if ! curl --fail --max-time 30 "${http_endpoint}" 1>/dev/null 2>&1; then
echo "HTTP endpoint '${http_endpoint}' is not reachable. Skipping test ..."
exit 77
else
echo "HTTP endpoint '${http_endpoint}' is reachable! Starting test ..."
fi
}
# function to skip a test on a specific platform
# $1 is what we check in uname, $2 (optioal) is a reason message
function skip_platform() {
if [ "$(uname)" == "$1" ]; then
echo "uname $(uname)"
echo "test does not work under $1"
if [ "$2" != "" ]; then
echo "reason: $2"
fi
exit 77
fi
}
function setvar_RS_HOSTNAME() {
printf '### Obtaining HOSTNAME (prequisite, not actual test) ###\n'
generate_conf
add_conf 'module(load="../plugins/imtcp/.libs/imtcp")
input(type="imtcp" port="'$TCPFLOOD_PORT'")
$template hostname,"%hostname%"
local0.* ./'${RSYSLOG_DYNNAME}'.HOSTNAME;hostname
'
rm -f "${RSYSLOG_DYNNAME}.HOSTNAME"
startup
tcpflood -m1 -M "\"<128>\""
shutdown_when_empty # shut down rsyslogd when done processing messages
wait_shutdown # we need to wait until rsyslogd is finished!
export RS_HOSTNAME="$(cat ${RSYSLOG_DYNNAME}.HOSTNAME)"
rm -f "${RSYSLOG_DYNNAME}.HOSTNAME"
echo HOSTNAME is: $RS_HOSTNAME
}
# begin a new testconfig
# 2018-09-07: Incremented inputs.timeout.shutdown to 60000 because kafka tests may not be
# finished under stress otherwise
# $1 is the instance id, if given
function generate_conf() {
if [ "$RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT" == "" ]; then
RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT="60000"
fi
export TCPFLOOD_PORT="$(get_free_port)"
if [ "$1" == "" ]; then
export TESTCONF_NM="${RSYSLOG_DYNNAME}_" # this basename is also used by instance 2!
export RSYSLOG_OUT_LOG="${RSYSLOG_DYNNAME}.out.log"
export RSYSLOG2_OUT_LOG="${RSYSLOG_DYNNAME}_2.out.log"
export RSYSLOG_PIDBASE="${RSYSLOG_DYNNAME}:" # also used by instance 2!
mkdir $RSYSLOG_DYNNAME.spool
fi
echo 'module(load="../plugins/imdiag/.libs/imdiag")
global(inputs.timeout.shutdown="'$RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT'")
$IMDiagListenPortFileName '$RSYSLOG_DYNNAME.imdiag$1.port'
$IMDiagServerRun 0
:syslogtag, contains, "rsyslogd" ./'${RSYSLOG_DYNNAME}$1'.started
###### end of testbench instrumentation part, test conf follows:' > ${TESTCONF_NM}$1.conf
}
# add more data to config file. Note: generate_conf must have been called
# $1 is config fragment, $2 the instance id, if given
function add_conf() {
printf '%s' "$1" >> ${TESTCONF_NM}$2.conf
}
function rst_msleep() {
$TESTTOOL_DIR/msleep $1
}
# compare file to expected exact content
# $1 is file to compare
function cmp_exact() {
if [ "$1" == "" ]; then
printf "Testbench ERROR, cmp_exact() needs filename as \$1\n"
error_exit 1
fi
if [ "$EXPECTED" == "" ]; then
printf 'Testbench ERROR, cmp_exact() needs to have env var EXPECTED set!\n'
error_exit 1
fi
printf '%s\n' "$EXPECTED" | cmp - "$1"
if [ $? -ne 0 ]; then
echo "invalid response generated"
echo "################# $1 is:"
cat -n ${RSYSLOG_OUT_LOG}
echo "################# EXPECTED was:"
printf '%s\n' "$EXPECTED" | cat -n -
printf '\n#################### diff is:\n'
printf '%s\n' "$EXPECTED" | diff - "$1"
error_exit 1
fi;
}
# code common to all startup...() functions
function startup_common() {
instance=
if [ "$1" == "2" ]; then
CONF_FILE="${TESTCONF_NM}2.conf"
instance=2
elif [ "$1" == "" -o "$1" == "1" ]; then
CONF_FILE="${TESTCONF_NM}.conf"
else
CONF_FILE="$srcdir/testsuites/$1"
instance=$2
fi
# we need to remove the imdiag port file as there are some
# tests that start multiple times. These may get the old port
# number if the file still exists AND timing is bad so that
# imdiag does not genenrate the port file quickly enough on
# startup.
rm -f $RSYSLOG_DYNNAME.imdiag$instance.port
if [ ! -f $CONF_FILE ]; then
echo "ERROR: config file '$CONF_FILE' not found!"
error_exit 1
fi
echo config $CONF_FILE is:
cat -n $CONF_FILE
}
# wait for appearance of a specific pid file, given as $1
function wait_startup_pid() {
if [ "$1" == "" ]; then
echo "FAIL: testbench bug: wait_startup_called without \$1"
error_exit 100
fi
i=0
start_timeout="$(date)"
while test ! -f $1.pid; do
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on startup (pid file $1.pid)"
echo "Wait initiated $start_timeout, now $(date)"
ls -l $1.pid
ps -fp $(cat $1.pid)
error_exit 1
fi
done
echo "$1.pid found, pid $(cat $1.pid)"
}
# special version of wait_startup_pid() for rsyslog startup
function wait_rsyslog_startup_pid() {
wait_startup_pid $RSYSLOG_PIDBASE$1
}
# wait for startup of an arbitrary process
# $1 - pid file name
# $2 - startup file name (optional, only checked if given)
function wait_process_startup() {
wait_startup_pid $1
i=0
if [ "$2" != "" ]; then
while test ! -f "$2"; do
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
ps -p $(cat $1.pid) &> /dev/null
if [ $? -ne 0 ]
then
echo "ABORT! pid in $1 no longer active during startup!"
error_exit 1
fi
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on file '$2'"
error_exit 1
fi
done
echo "$2 seen, associated pid " $(cat $1.pid)
fi
}
# wait for file $1 to exist AND be non-empty
# $1 : file to wait for
# $2 (optional): error message to show if timeout occurs
function wait_file_exists() {
i=0
while true; do
if [ -f $1 -a "$(cat $1 2> /dev/null)" != "" ]; then
break
fi
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
((i++))
if test $i -gt $TB_TIMEOUT_STARTSTOP; then
echo "ABORT! Timeout waiting for file $1"
ls -l $1
if [ "$2" != "" ]; then
echo "$2"
fi
error_exit 1
fi
done
}
# kafka special wait function: we wait for the output file in order
# to ensure Kafka/Zookeeper is actually ready to go. This is NOT
# a generic check function and must only used with those kafka tests
# that actually need it.
function kafka_wait_group_coordinator() {
echo We are waiting for kafka/zookeper being ready to deliver messages
wait_file_exists $RSYSLOG_OUT_LOG "
Non-existence of $RSYSLOG_OUT_LOG can be caused
by a problem inside zookeeper. If debug output in the receiver is enabled, one
may see this message:
\"GroupCoordinator response error: Broker: Group coordinator not available\"
In this case you may want to do a web search and/or have a look at
https://github.com/edenhill/librdkafka/issues/799
The question, of course, is if there is nevertheless a problem in imkafka.
Usually, the wait we do inside the testbench is sufficient to handle all
Zookeeper/Kafka startup. So if the issue reoccurs, it is suggested to enable
debug output in the receiver and check for actual problems.
"
}
# check if kafka itself failed. $1 is the message file name.
function kafka_check_broken_broker() {
grep -q "Broker transport failure" < "$1"
if [ "$?" -eq "0" ]; then
echo environment-induced test error - kafka broker failed - skipping test
cat -n $1
exit 77
fi
}
# wait for rsyslogd startup ($1 is the instance)
function wait_startup() {
wait_rsyslog_startup_pid $1
i=0
while test ! -f ${RSYSLOG_DYNNAME}$1.started; do
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
ps -p $(cat $RSYSLOG_PIDBASE$1.pid) &> /dev/null
if [ $? -ne 0 ]
then
echo "ABORT! rsyslog pid no longer active during startup!"
error_exit 1 stacktrace
fi
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on startup ('${RSYSLOG_DYNNAME}.started' file)"
error_exit 1
fi
done
echo "rsyslogd$1 startup msg seen, pid " $(cat $RSYSLOG_PIDBASE$1.pid)
wait_file_exists $RSYSLOG_DYNNAME.imdiag$1.port
eval export IMDIAG_PORT$1=$(cat $RSYSLOG_DYNNAME.imdiag$1.port)
eval PORT=$IMDIAG_PORT$1
echo "imdiag$1 port: $PORT"
if [ "$PORT" == "" ]; then
echo "TESTBENCH ERROR: imdiag port not found!"
ls -l $RSYSLOG_DYNNAME*
exit 100
fi
}
# start rsyslogd with default params. $1 is the config file name to use
# returns only after successful startup, $2 is the instance (blank or 2!)
# RS_REDIR maybe set to redirect rsyslog output
# env var RSTB_DAEMONIZE" == "YES" means rsyslogd shall daemonize itself;
# any other value or unset means it does not do that.
function startup() {
startup_common "$1" "$2"
if [ "$RSTB_DAEMONIZE" == "YES" ]; then
n_option=""
else
n_option="-n"
fi
eval LD_PRELOAD=$RSYSLOG_PRELOAD $valgrind ../tools/rsyslogd -C $n_option -i$RSYSLOG_PIDBASE$instance.pid -M../runtime/.libs:../.libs -f$CONF_FILE $RS_REDIR &
wait_startup $instance
}
# same as startup_vg, BUT we do NOT wait on the startup message!
function startup_vg_waitpid_only() {
startup_common "$1" "$2"
if [ "x$RS_TESTBENCH_LEAK_CHECK" == "x" ]; then
RS_TESTBENCH_LEAK_CHECK=full
fi
LD_PRELOAD=$RSYSLOG_PRELOAD valgrind $RS_TEST_VALGRIND_EXTRA_OPTS $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --suppressions=$srcdir/known_issues.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --gen-suppressions=all --log-fd=1 --error-exitcode=10 --malloc-fill=ff --free-fill=fe --leak-check=$RS_TESTBENCH_LEAK_CHECK ../tools/rsyslogd -C -n -i$RSYSLOG_PIDBASE$2.pid -M../runtime/.libs:../.libs -f$CONF_FILE &
wait_rsyslog_startup_pid $1
}
# start rsyslogd with default params under valgrind control. $1 is the config file name to use
# returns only after successful startup, $2 is the instance (blank or 2!)
function startup_vg() {
startup_vg_waitpid_only $1 $2
wait_startup $2
echo startup_vg still running
}
# same as startup-vg, except that --leak-check is set to "none". This
# is meant to be used in cases where we have to deal with libraries (and such
# that) we don't can influence and where we cannot provide suppressions as
# they are platform-dependent. In that case, we can't test for leak checks
# (obviously), but we can check for access violations, what still is useful.
function startup_vg_noleak() {
RS_TESTBENCH_LEAK_CHECK=no
startup_vg $*
}
# same as startup-vgthread, BUT we do NOT wait on the startup message!
function startup_vgthread_waitpid_only() {
startup_common "$1" "$2"
valgrind --tool=helgrind $RS_TEST_VALGRIND_EXTRA_OPTS $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --log-fd=1 --error-exitcode=10 --suppressions=$srcdir/linux_localtime_r.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --suppressions=$srcdir/CI/gcov.supp --gen-suppressions=all ../tools/rsyslogd -C -n -i$RSYSLOG_PIDBASE$2.pid -M../runtime/.libs:../.libs -f$CONF_FILE &
wait_rsyslog_startup_pid $2
}
# start rsyslogd with default params under valgrind thread debugger control.
# $1 is the config file name to use, $2 is the instance (blank or 2!)
# returns only after successful startup
function startup_vgthread() {
startup_vgthread_waitpid_only $1 $2
wait_startup $2
}
# inject messages via our inject interface (imdiag)
function injectmsg() {
echo injecting $2 messages
echo injectmsg $1 $2 $3 $4 | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
}
# inject messages in INSTANCE 2 via our inject interface (imdiag)
function injectmsg2() {
echo injecting $2 messages
echo injectmsg $1 $2 $3 $4 | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $?
# TODO: some return state checking? (does it really make sense here?)
}
# show the current main queue size. $1 is the instance.
function get_mainqueuesize() {
if [ "$1" == "2" ]; then
echo getmainmsgqueuesize | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $?
else
echo getmainmsgqueuesize | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
fi
}
# grep for (partial) content. $1 is the content to check for
function content_check() {
grep -qF "$1" < ${RSYSLOG_OUT_LOG}
if [ "$?" -ne "0" ]; then
printf '\n============================================================\n'
echo FAIL: content_check failed to find "'$1'", content is
cat -n ${RSYSLOG_OUT_LOG}
error_exit 1
fi
}
function content_check_with_count() {
# content check variables for Timeout
if [ "x$3" == "x" ]; then
timeoutend=1
else
timeoutend=$3
fi
timecounter=0
while [ $timecounter -lt $timeoutend ]; do
let timecounter=timecounter+1
count=$(grep -F "$1" <${RSYSLOG_OUT_LOG} | wc -l)
if [ $count -eq $2 ]; then
echo content_check_with_count success, \"$1\" occured $2 times
break
else
if [ "x$timecounter" == "x$timeoutend" ]; then
shutdown_when_empty
wait_shutdown
echo content_check_with_count failed, expected \"$1\" to occur $2 times, but found it $count times
echo file ${RSYSLOG_OUT_LOG} content is:
sort < ${RSYSLOG_OUT_LOG}
error_exit 1
else
echo content_check_with_count have $count, wait for $2 times $1...
$TESTTOOL_DIR/msleep 1000
fi
fi
done
}
function custom_content_check() {
grep -qF "$1" < $2
if [ "$?" -ne "0" ]; then
echo FAIL: custom_content_check failed to find "'$1'" inside "'$2'"
echo "file contents:"
cat -n $2
error_exit 1
fi
}
# check that given content $1 is not present in file $2 (default: RSYSLOG_OUTLOG)
# regular expressions may be used
function check_not_present() {
if [ "$2" == "" ]; then
file=$RSYSLOG_OUT_LOG
else
file="$2"
fi
grep -q "$1" < "$file"
if [ "$?" -eq "0" ]; then
echo FAIL: check_not present found
echo $1
echo inside file $file of $(wc -l < $file) lines
echo samples:
cat -n "$file" | grep "$1" | head -10
error_exit 1
fi
}
# wait for main message queue to be empty. $1 is the instance.
function wait_queueempty() {
if [ "$1" == "2" ]; then
echo WaitMainQueueEmpty | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $?
else
echo WaitMainQueueEmpty | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
fi
}
# shut rsyslogd down when main queue is empty. $1 is the instance.
function shutdown_when_empty() {
if [ "$1" == "2" ]; then
echo Shutting down instance 2
else
echo Shutting down instance 1
fi
wait_queueempty $1
if [ "$RSYSLOG_PIDBASE" == "" ]; then
echo "RSYSLOG_PIDBASE is EMPTY! - bug in test? (instance $1)"
exit 1
fi
cp $RSYSLOG_PIDBASE$1.pid $RSYSLOG_PIDBASE$1.pid.save
$TESTTOOL_DIR/msleep 500 # wait a bit (think about slow testbench machines!)
kill $(cat $RSYSLOG_PIDBASE$1.pid) # note: we do not wait for the actual termination!
}
# shut rsyslogd down without emptying the queue. $2 is the instance.
function shutdown_immediate() {
cp $RSYSLOG_PIDBASE$2.pid $RSYSLOG_PIDBASE$2.pid.save
kill $(cat $RSYSLOG_PIDBASE$2.pid)
}
# actually, we wait for rsyslog.pid to be deleted.
# $1 is the instance
function wait_shutdown() {
i=0
out_pid=$(cat $RSYSLOG_PIDBASE$1.pid.save)
echo wait on shutdown of $out_pid
if [[ "x$out_pid" == "x" ]]
then
terminated=1
else
terminated=0
fi
start_timeout="$(date)"
while [[ $terminated -eq 0 ]]; do
ps -p $out_pid &> /dev/null
if [[ $? != 0 ]]; then
terminated=1
fi
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on shutdown"
echo "Wait initiated $start_timeout, now $(date)"
ps -fp $out_pid
echo "Instance is possibly still running and may need"
echo "manual cleanup."
echo "TRYING TO capture status via gdb from hanging process"
$SUDO gdb ../tools/rsyslogd <<< "attach $out_pid
set pagination off
inf thr
thread apply all bt
quit"
echo "trying to kill -9 process"
kill -9 $out_pid
error_exit 1
fi
done
unset terminated
unset out_pid
if [ -e core.* ]
then
echo "ABORT! core file exists"
error_exit 1
fi
}
# wait for all pending lookup table reloads to complete $1 is the instance.
function await_lookup_table_reload() {
if [ "$1" == "2" ]; then
echo AwaitLookupTableReload | $TESTTOOL_DIR/diagtalker -pIMDIAG_PORT2 || error_exit $?
else
echo AwaitLookupTableReload | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
fi
}
function assert_content_missing() {
grep -qF "$1" < ${RSYSLOG_OUT_LOG}
if [ "$?" -eq "0" ]; then
echo content-missing assertion failed, some line matched pattern "'$1'"
error_exit 1
fi
}
function custom_assert_content_missing() {
grep -qF "$1" < $2
if [ "$?" -eq "0" ]; then
echo content-missing assertion failed, some line in "'$2'" matched pattern "'$1'"
cat -n "$2"
error_exit 1
fi
}
# shut rsyslogd down when main queue is empty. $1 is the instance.
function issue_HUP() {
kill -HUP $(cat $RSYSLOG_PIDBASE$1.pid)
$TESTTOOL_DIR/msleep 1000
}
# actually, we wait for rsyslog.pid to be deleted. $1 is the instance
function wait_shutdown_vg() {
wait $(cat $RSYSLOG_PIDBASE$1.pid)
export RSYSLOGD_EXIT=$?
echo rsyslogd run exited with $RSYSLOGD_EXIT
if [ -e vgcore.* ]; then
echo "ABORT! core file exists"
error_exit 1
fi
}
# check exit code for valgrind error
function check_exit_vg(){
if [ "$RSYSLOGD_EXIT" -eq "10" ]; then
echo "valgrind run FAILED with exceptions - terminating"
error_exit 1
fi
}
# this is called if we had an error and need to abort. Here, we
# try to gather as much information as possible. That's most important
# for systems like Travis-CI where we cannot debug on the machine itself.
# our $1 is the to-be-used exit code. if $2 is "stacktrace", call gdb.
function error_exit() {
#env
if [ -e core* ]
then
echo trying to obtain crash location info
echo note: this may not be the correct file, check it
CORE=$(ls core*)
echo "bt" >> gdb.in
echo "q" >> gdb.in
gdb ../tools/rsyslogd $CORE -batch -x gdb.in
CORE=
rm gdb.in
fi
if [[ "$2" == 'stacktrace' || ( ! -e IN_AUTO_DEBUG && "$USE_AUTO_DEBUG" == 'on' ) ]]; then
if [ -e core* ]
then
echo trying to analyze core for main rsyslogd binary
echo note: this may not be the correct file, check it
CORE=$(ls core*)
#echo "set pagination off" >gdb.in
#echo "core $CORE" >>gdb.in
echo "bt" >> gdb.in
echo "echo === THREAD INFO ===" >> gdb.in
echo "info thread" >> gdb.in
echo "echo === thread apply all bt full ===" >> gdb.in
echo "thread apply all bt full" >> gdb.in
echo "q" >> gdb.in
gdb ../tools/rsyslogd $CORE -batch -x gdb.in
CORE=
rm gdb.in
fi
fi
if [[ ! -e IN_AUTO_DEBUG && "$USE_AUTO_DEBUG" == 'on' ]]; then
touch IN_AUTO_DEBUG
# OK, we have the testname and can re-run under valgrind
echo re-running under valgrind control
current_test="./$(basename $0)" # this path is probably wrong -- theinric
$current_test
# wait a little bit so that valgrind can finish
$TESTTOOL_DIR/msleep 4000
# next let's try us to get a debug log
RSYSLOG_DEBUG_SAVE=$RSYSLOG_DEBUG
export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction"
$current_test
$TESTTOOL_DIR/msleep 4000
RSYSLOG_DEBUG=$RSYSLOG_DEBUG_SAVE
rm IN_AUTO_DEBUG
fi
# output listening ports as a temporay debug measure (2018-09-08 rgerhards), now disables, but not yet removed (2018-10-22)
#if [ $(uname) == "Linux" ]; then
# netstat -tlp
#else
# netstat
#fi
# Extended debug output for dependencies started by testbench
if [[ "$EXTRA_EXITCHECK" == 'dumpkafkalogs' ]]; then
# Dump Zookeeper log
dump_zookeeper_serverlog
# Dump Kafka log
dump_kafka_serverlog
fi
# Extended Exit handling for kafka / zookeeper instances
kafka_exit_handling "false"
# Report error to rsyslog testbench stats
if [[ $1 == 1 ]]; then
error_stats
fi
# we need to do some minimal cleanup so that "make distcheck" does not
# complain too much
exit $1
}
# Helper function to call Adiscon test error script
function error_stats() {
if [ "$RSYSLOG_STATSURL" != "" ]; then
echo reporting failure to $RSYSLOG_STATSURL
wget -nv $RSYSLOG_STATSURL\?Testname=$RSYSLOG_TESTNAME\&Testenv=$PWD\&Testmachine=$HOSTNAME\&rndstr=jnxv8i34u78fg23
fi
}
# do the usual sequence check to see if everything was properly received.
# $4... are just to have the abilit to pass in more options...
# add -v to chkseq if you need more verbose output
function seq_check() {
$RS_SORTCMD $RS_SORT_NUMERIC_OPT < ${RSYSLOG_OUT_LOG} | ./chkseq -s$1 -e$2 $3 $4 $5 $6 $7
if [ "$?" -ne "0" ]; then
$RS_SORTCMD $RS_SORT_NUMERIC_OPT < ${RSYSLOG_OUT_LOG} > $RSYSLOG_DYNNAME.error.log
echo "sequence error detected in $RSYSLOG_OUT_LOG"
echo "number of lines in file: $(wc -l $RSYSLOG_OUT_LOG)"
echo "sorted data has been placed in error.log, first 10 lines are:"
cat -n $RSYSLOG_DYNNAME.error.log | head -10
echo "---last 10 lines are:"
cat -n $RSYSLOG_DYNNAME.error.log | tail -10
echo "UNSORTED data, first 10 lines are:"
cat -n $RSYSLOG_OUT_LOG | head -10
echo "---last 10 lines are:"
cat -n $RSYSLOG_OUT_LOG | tail -10
# for interactive testing, create a static filename. We know this may get
# mangled during a parallel test run
mv -f $RSYSLOG_DYNNAME.error.log error.log
error_exit 1
fi
}
# do the usual sequence check to see if everything was properly received. This is
# a duplicateof seq-check, but we could not change its calling conventions without
# breaking a lot of exitings test cases, so we preferred to duplicate the code here.
# $4... are just to have the abilit to pass in more options...
# add -v to chkseq if you need more verbose output
function seq_check2() {
$RS_SORTCMD $RS_SORT_NUMERIC_OPT < ${RSYSLOG2_OUT_LOG} | ./chkseq -s$1 -e$2 $3 $4 $5 $6 $7
if [ "$?" -ne "0" ]; then
echo "sequence error detected"
error_exit 1
fi
}
# do the usual sequence check, but for gzip files
# $4... are just to have the abilit to pass in more options...
function gzip_seq_check() {
ls -l ${RSYSLOG_OUT_LOG}
gunzip < ${RSYSLOG_OUT_LOG} | $RS_SORTCMD $RS_SORT_NUMERIC_OPT | ./chkseq -v -s$1 -e$2 $3 $4 $5 $6 $7
if [ "$?" -ne "0" ]; then
echo "sequence error detected"
error_exit 1
fi
}
# do a tcpflood run and check if it worked params are passed to tcpflood
function tcpflood() {
eval ./tcpflood -p$TCPFLOOD_PORT "$@" $TCPFLOOD_EXTRA_OPTS
if [ "$?" -ne "0" ]; then
echo "error during tcpflood on port ${TCPFLOOD_PORT}! see ${RSYSLOG_OUT_LOG}.save for what was written"
cp ${RSYSLOG_OUT_LOG} ${RSYSLOG_OUT_LOG}.save
error_exit 1 stacktrace
fi
}
# cleanup
# detect any left-over hanging instance
function exit_test() {
nhanging=0
#for pid in $(ps -eo pid,args|grep '/tools/[r]syslogd ' |sed -e 's/\( *\)\([0-9]*\).*/\2/');
#do
#echo "ERROR: left-over instance $pid, killing it"
# ps -fp $pid
# pwd
# printf "we do NOT kill the instance as this does not work with multiple\n"
# printf "builds per machine - this message is now informational to show prob exists!\n"
# #kill -9 $pid
# let "nhanging++"
#done
if test $nhanging -ne 0
then
echo "ABORT! hanging instances left at exit"
#error_exit 1
#exit 77 # for now, just skip - TODO: reconsider when supporting -j
fi
# now real cleanup
rm -f rsyslog.action.*.include
rm -f work rsyslog.out.* xlate*.lkp_tbl
rm -rf test-logdir stat-file1
rm -f rsyslog.conf.tlscert stat-file1 rsyslog.empty imfile-state*
rm -rf rsyslog-link.*.log targets
rm -f ${TESTCONF_NM}.conf
rm -f tmp.qi nocert
rm -fr $RSYSLOG_DYNNAME* # delete all of our dynamic files
unset TCPFLOOD_EXTRA_OPTS
# Extended Exit handling for kafka / zookeeper instances
kafka_exit_handling "true"
printf 'Test SUCCESFUL\n'
echo -------------------------------------------------------------------------------
}
# finds a free port that we can bind a listener to
# Obviously, any solution is race as another process could start
# just after us and grab the same port. However, in practice it seems
# to work pretty well. In any case, we should probably call this as
# late as possible before the usage of the port.
function get_free_port() {
python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()'
}
# check if command $1 is available - will exit 77 when not OK
function check_command_available() {
command -v $1
if [ $? -ne 0 ] ; then
echo Testbench requires unavailable command: $1
exit 77
fi
}
# sort the output file just like we normally do it, but do not call
# seqchk. This is needed for some operations where we need the sort
# result for some preprocessing. Note that a later seqchk will sort
# again, but that's not an issue.
function presort() {
rm -f $RSYSLOG_DYNNAME.presort
$RS_SORTCMD $RS_SORT_NUMERIC_OPT < ${RSYSLOG_OUT_LOG} > $RSYSLOG_DYNNAME.presort
}
#START: ext kafka config
#dep_cache_dir=$(readlink -f .dep_cache)
dep_cache_dir=$(pwd)/.dep_cache
dep_zk_url=http://www-us.apache.org/dist/zookeeper/zookeeper-3.4.13/zookeeper-3.4.13.tar.gz
dep_zk_cached_file=$dep_cache_dir/zookeeper-3.4.13.tar.gz
# byANDRE: We stay with kafka 0.10.x for now. Newer Kafka Versions have changes that
# makes creating testbench with single kafka instances difficult.
# old version -> dep_kafka_url=http://www-us.apache.org/dist/kafka/0.10.2.2/kafka_2.12-0.10.2.2.tgz
# old version -> dep_kafka_cached_file=$dep_cache_dir/kafka_2.12-0.10.2.2.tgz
dep_kafka_url=http://www-us.apache.org/dist/kafka/2.0.0/kafka_2.11-2.0.0.tgz
dep_kafka_cached_file=$dep_cache_dir/kafka_2.11-2.0.0.tgz
if [ -z "$ES_DOWNLOAD" ]; then
export ES_DOWNLOAD=elasticsearch-5.6.9.tar.gz
fi
dep_es_cached_file="$dep_cache_dir/$ES_DOWNLOAD"
# kafaka (including Zookeeper)
dep_kafka_dir_xform_pattern='s#^[^/]\+#kafka#g'
dep_zk_dir_xform_pattern='s#^[^/]\+#zk#g'
dep_es_dir_xform_pattern='s#^[^/]\+#es#g'
#dep_kafka_log_dump=$(readlink -f rsyslog.out.kafka.log)
# TODO Make dynamic work dir for multiple instances
#dep_work_dir=$(readlink -f .dep_wrk)
dep_work_dir=$(pwd)/.dep_wrk
#dep_kafka_work_dir=$dep_work_dir/kafka
#dep_zk_work_dir=$dep_work_dir/zk
#END: ext kafka config
function kafka_exit_handling() {
# Extended Exit handling for kafka / zookeeper instances
if [[ "$EXTRA_EXIT" == 'kafka' ]]; then
echo "stop kafka instance"
stop_kafka '.dep_wrk' $1
echo "stop zookeeper instance"
stop_zookeeper '.dep_wrk' $1
fi
# Extended Exit handling for kafka / zookeeper instances
if [[ "$EXTRA_EXIT" == 'kafkamulti' ]]; then
echo "stop kafka instances"
stop_kafka '.dep_wrk1' $1
stop_kafka '.dep_wrk2' $1
stop_kafka '.dep_wrk3' $1
echo "stop zookeeper instances"
stop_zookeeper '.dep_wrk1' $1
stop_zookeeper '.dep_wrk2' $1
stop_zookeeper '.dep_wrk3' $1
fi
}
function download_kafka() {
if [ ! -d $dep_cache_dir ]; then
echo "Creating dependency cache dir $dep_cache_dir"
mkdir $dep_cache_dir
fi
if [ ! -f $dep_zk_cached_file ]; then
echo "Downloading zookeeper"
wget -q $dep_zk_url -O $dep_zk_cached_file
if [ $? -ne 0 ]
then
echo error during wget, retry:
wget $dep_zk_url -O $dep_zk_cached_file
if [ $? -ne 0 ]
then
error_exit 1
fi
fi
fi
if [ ! -f $dep_kafka_cached_file ]; then
echo "Downloading kafka"
wget -q $dep_kafka_url -O $dep_kafka_cached_file
if [ $? -ne 0 ]
then
echo error during wget, retry:
wget $dep_kafka_url -O $dep_kafka_cached_file
if [ $? -ne 0 ]
then
error_exit 1
fi
fi
fi
}
function stop_kafka() {
i=0
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_kafka_config="kafka-server.properties"
else
dep_work_dir=$(readlink -f $srcdir/$1)
if [[ ".dep_wrk" != "$1" ]]; then
dep_work_kafka_config="kafka-server$1.properties"
else
dep_work_kafka_config="kafka-server.properties"
fi
fi
if [ ! -d $dep_work_dir/kafka ]; then
echo "Kafka work-dir $dep_work_dir/kafka does not exist, no action needed"
else
# Get kafka pid instance
kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}')
echo "Stopping Kafka instance $1 ($dep_work_kafka_config/$kafkapid)"
kill $kafkapid
# Check if kafka instance went down!
while true; do
#echo "waiting for propper shutdown for $dep_work_kafka_config / $kafkapid "
kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}')
if [[ "" != "$kafkapid" ]]; then
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
if test $i -gt $TB_TIMEOUT_STARTSTOP; then
echo "Kafka instance $dep_work_kafka_config (PID $kafkapid) still running - Performing hard shutdown (-9)"
kill -9 $kafkapid
break
fi
let "i++"
else
# Break the loop
break
fi
done
if [[ "$2" == 'true' ]]; then
# Prozess shutdown, do cleanup now
cleanup_kafka $1
fi
fi
}
function cleanup_kafka() {
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
else
dep_work_dir=$(readlink -f $srcdir/$1)
fi
if [ ! -d $dep_work_dir/kafka ]; then
echo "Kafka work-dir $dep_work_dir/kafka does not exist, no action needed"
else
echo "Cleanup Kafka instance $1"
rm -rf $dep_work_dir/kafka
fi
}
function stop_zookeeper() {
i=0
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_tk_config="zoo.cfg"
else
dep_work_dir=$(readlink -f $srcdir/$1)
if [[ ".dep_wrk" != "$1" ]]; then
dep_work_tk_config="zoo$1.cfg"
else
dep_work_tk_config="zoo.cfg"
fi
fi
if [ ! -d $dep_work_dir/zk ]; then
echo "Zookeeper work-dir $dep_work_dir/zk does not exist, no action needed"
else
# Get Zookeeper pid instance
zkpid=$(ps aux | grep -i $dep_work_tk_config | grep java | grep -v grep | awk '{print $2}')
echo "Stopping Zookeeper instance $1 ($dep_work_tk_config/$zkpid)"
kill $zkpid
# Check if Zookeeper instance went down!
zkpid=$(ps aux | grep -i $dep_work_tk_config | grep java | grep -v grep | awk '{print $2}')
if [[ "" != "$zkpid" ]]; then
while true; do
#echo "waiting for propper shutdown for $dep_work_tk_config / $zkpid "
zkpid=$(ps aux | grep -i $dep_work_tk_config | grep java | grep -v grep | awk '{print $2}')
if [[ "" != "$zkpid" ]]; then
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
if test $i -gt $TB_TIMEOUT_STARTSTOP; then
echo "Zookeeper instance $dep_work_tk_config (PID $zkpid) still running - Performing hard shutdown (-9)"
kill -9 $zkpid
break
fi
let "i++"
else
# Break the loop
break
fi
done
fi
if [[ "$2" == 'true' ]]; then
# Prozess shutdown, do cleanup now
cleanup_zookeeper $1
fi
fi
}
function cleanup_zookeeper() {
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
else
dep_work_dir=$(readlink -f $srcdir/$1)
fi
rm -rf $dep_work_dir/zk
}
function start_zookeeper() {
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_tk_config="zoo.cfg"
else
dep_work_dir=$(readlink -f $srcdir/$1)
dep_work_tk_config="zoo$1.cfg"
fi
if [ ! -f $dep_zk_cached_file ]; then
echo "Dependency-cache does not have zookeeper package, did you download dependencies?"
error_exit 77
fi
if [ ! -d $dep_work_dir ]; then
echo "Creating dependency working directory"
mkdir -p $dep_work_dir
fi
if [ -d $dep_work_dir/zk ]; then
(cd $dep_work_dir/zk && ./bin/zkServer.sh stop)
$TESTTOOL_DIR/msleep 2000
fi
rm -rf $dep_work_dir/zk
(cd $dep_work_dir && tar -zxvf $dep_zk_cached_file --xform $dep_zk_dir_xform_pattern --show-transformed-names) > /dev/null
cp -f $srcdir/testsuites/$dep_work_tk_config $dep_work_dir/zk/conf/zoo.cfg
echo "Starting Zookeeper instance $1"
(cd $dep_work_dir/zk && ./bin/zkServer.sh start)
$TESTTOOL_DIR/msleep 2000
}
function start_kafka() {
# Force IPv4 usage of Kafka!
export KAFKA_OPTS="-Djava.net.preferIPv4Stack=True"
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_kafka_config="kafka-server.properties"
else
dep_work_dir=$(readlink -f $1)
dep_work_kafka_config="kafka-server$1.properties"
fi
if [ ! -f $dep_kafka_cached_file ]; then
echo "Dependency-cache does not have kafka package, did you download dependencies?"
error_exit 77
fi
if [ ! -d $dep_work_dir ]; then
echo "Creating dependency working directory"
mkdir -p $dep_work_dir
fi
rm -rf $dep_work_dir/kafka
( cd $dep_work_dir &&
tar -zxvf $dep_kafka_cached_file --xform $dep_kafka_dir_xform_pattern --show-transformed-names) > /dev/null
cp -f $srcdir/testsuites/$dep_work_kafka_config $dep_work_dir/kafka/config/
echo "Starting Kafka instance $dep_work_kafka_config"
(cd $dep_work_dir/kafka && ./bin/kafka-server-start.sh -daemon ./config/$dep_work_kafka_config)
$TESTTOOL_DIR/msleep 4000
# Check if kafka instance came up!
kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}')
if [[ "" != "$kafkapid" ]];
then
echo "Kafka instance $dep_work_kafka_config (PID $kafkapid) started ... "
else
echo "Starting Kafka instance $dep_work_kafka_config, SECOND ATTEMPT!"
(cd $dep_work_dir/kafka && ./bin/kafka-server-start.sh -daemon ./config/$dep_work_kafka_config)
$TESTTOOL_DIR/msleep 4000
kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}')
if [[ "" != "$kafkapid" ]];
then
echo "Kafka instance $dep_work_kafka_config (PID $kafkapid) started ... "
else
echo "Failed to start Kafka instance for $dep_work_kafka_config"
error_exit 77
fi
fi
}
function create_kafka_topic() {
if [ "x$2" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
else
dep_work_dir=$(readlink -f $2)
fi
if [ "x$3" == "x" ]; then
dep_work_port='2181'
else
dep_work_port=$3
fi
if [ ! -d $dep_work_dir/kafka ]; then
echo "Kafka work-dir $dep_work_dir/kafka does not exist, did you start kafka?"
exit 1
fi
if [ "x$1" == "x" ]; then
echo "Topic-name not provided."
exit 1
fi
# we need to make sure replication has is working. So let's loop until no more
# errors (or timeout) - see also: https://github.com/rsyslog/rsyslog/issues/3045
timeout_ready=100 # roughly 10 sec
is_retry=0
i=0
while true; do
text=$(cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --zookeeper localhost:$dep_work_port/kafka --create --topic $1 --replication-factor 1 --partitions 2 )
grep "Error.* larger than available brokers: 0" <<<"$text"
res=$?
echo RESULT GREP: $res
if [ $res -ne 0 ]; then
echo looks like brokers are available - continue...
break
fi
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
let "i++"
if test $i -gt $timeout_ready; then
echo "ENV ERROR: kafka brokers did not come up:"
cat -n <<< $text
if [ $is_retry == 1 ]; then
echo "SKIPing test as the env is not ready for it"
exit 77
fi
echo "RETRYING kafka startup, doing shutdown and startup"
stop_zookeeper; stop_kafka
start_zookeeper; start_kafka
echo "READY for RETRY"
is_retry=1
i=0
fi
done
# we *assume* now all goes well
(cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --zookeeper localhost:$dep_work_port/kafka --alter --topic $1 --delete-config retention.ms)
(cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --zookeeper localhost:$dep_work_port/kafka --alter --topic $1 --delete-config retention.bytes)
}
function delete_kafka_topic() {
if [ "x$2" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
else
dep_work_dir=$(readlink -f $srcdir/$2)
fi
if [ "x$3" == "x" ]; then
dep_work_port='2181'
else
dep_work_port=$3
fi
echo "deleting kafka-topic $1"
(cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --delete --zookeeper localhost:$dep_work_port/kafka --topic $1)
}
function dump_kafka_topic() {
if [ "x$2" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_kafka_log_dump=$(readlink -f rsyslog.out.kafka.log)
else
dep_work_dir=$(readlink -f $srcdir/$2)
dep_kafka_log_dump=$(readlink -f rsyslog.out.kafka$2.log)
fi
if [ "x$3" == "x" ]; then
dep_work_port='2181'
else
dep_work_port=$3
fi
echo "dumping kafka-topic $1"
if [ ! -d $dep_work_dir/kafka ]; then
echo "Kafka work-dir does not exist, did you start kafka?"
exit 1
fi
if [ "x$1" == "x" ]; then
echo "Topic-name not provided."
exit 1
fi
(cd $dep_work_dir/kafka && ./bin/kafka-console-consumer.sh --timeout-ms 2000 --from-beginning --zookeeper localhost:$dep_work_port/kafka --topic $1 > $dep_kafka_log_dump)
}
function dump_kafka_serverlog() {
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
else
dep_work_dir=$(readlink -f $srcdir/$1)
fi
if [ ! -d $dep_work_dir/kafka ]; then
echo "Kafka work-dir $dep_work_dir/kafka does not exist, no kafka debuglog"
else
echo "Dumping server.log from Kafka instance $1"
echo "========================================="
cat $dep_work_dir/kafka/logs/server.log
echo "========================================="
fi
}
function dump_zookeeper_serverlog() {
if [ "x$1" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
else
dep_work_dir=$(readlink -f $srcdir/$1)
fi
echo "Dumping zookeeper.out from Zookeeper instance $1"
echo "========================================="
cat $dep_work_dir/zk/zookeeper.out
echo "========================================="
}
case $1 in
'init') $srcdir/killrsyslog.sh # kill rsyslogd if it runs for some reason
# for (solaris) load debugging, uncomment next 2 lines:
#export LD_DEBUG=all
#ldd ../tools/rsyslogd
# environment debug
#find / -name "librelp.so*"
#ps -ef |grep syslog
#netstat -a | grep LISTEN
# cleanup of hanging instances from previous runs
# practice has shown this is pretty useful!
#for pid in $(ps -eo pid,args|grep '/tools/[r]syslogd ' |sed -e 's/\( *\)\([0-9]*\).*/\2/');
#do
#echo "ERROR: left-over previous instance $pid, killing it"
#ps -fp $pid
#pwd
#kill -9 $pid
#done
# cleanup hanging uxsockrcvr processes
for pid in $(ps -eo pid,args|grep 'uxsockrcvr' |grep -v grep |sed -e 's/\( *\)\([0-9]*\).*/\2/');
do
echo "ERROR: left-over previous uxsockrcvr instance $pid, killing it"
ps -fp $pid
pwd
kill -9 $pid
done
# end cleanup
# some default names (later to be set in other parts, once we support fully
# parallel tests)
export RSYSLOG_DFLT_LOG_INTERNAL=1 # testbench needs internal messages logged internally!
if [ "$SYSLOG_DYNNAME" != "" ]; then
echo "FAIL: \$RSYSLOG_DYNNAME already set in init"
echo "hint: was init accidently called twice?"
exit 2
fi
export RSYSLOG_DYNNAME="rstb_$(./test_id $(basename $0))"
export RSYSLOG2_OUT_LOG=rsyslog2.out.log # TODO: remove
export RSYSLOG_OUT_LOG=rsyslog.out.log # TODO: remove
export RSYSLOG_PIDBASE="rsyslog" # TODO: remove
export IMDIAG_PORT=13500
export IMDIAG_PORT2=13501
export TCPFLOOD_PORT=13514
# Extra Variables for Test statistic reporting
export RSYSLOG_TESTNAME=$(basename $0)
# we create one file with the test name, so that we know what was
# left over if "make distcheck" complains
touch $RSYSLOG_DYNNAME-$(basename $0).test_id
if [ -z $RS_SORTCMD ]; then
RS_SORTCMD=sort
fi
if [ -z $RS_SORT_NUMERIC_OPT ]; then
if [ "$(uname)" == "AIX" ]; then
RS_SORT_NUMERIC_OPT=-n
else
RS_SORT_NUMERIC_OPT=-g
fi
fi
if [ -z $RS_CMPCMD ]; then
RS_CMPCMD=cmp
fi
if [ -z $RS_HEADCMD ]; then
RS_HEADCMD=head
fi
ulimit -c unlimited &> /dev/null # at least try to get core dumps
echo "------------------------------------------------------------"
echo "Test: $0"
echo "------------------------------------------------------------"
# we assume TZ is set, else most test will fail. So let's ensure
# this really is the case
if [ -z $TZ ]; then
echo "testbench: TZ env var not set, setting it to UTC"
export TZ=UTC
fi
rm -f xlate*.lkp_tbl
rm -f log log* # RSyslog debug output
rm -f work
rm -rf test-logdir stat-file1
rm -f rsyslog.empty imfile-state* omkafka-failed.data
rm -rf rsyslog-link.*.log targets
rm -f tmp.qi nocert
rm -f core.* vgcore.* core*
# Note: rsyslog.action.*.include must NOT be deleted, as it
# is used to setup some parameters BEFORE calling init. This
# happens in chained test scripts. Delete on exit is fine,
# though.
# note: TCPFLOOD_EXTRA_OPTS MUST NOT be unset in init, because
# some tests need to set it BEFORE calling init to accomodate
# their generic test drivers.
if [ "$TCPFLOOD_EXTRA_OPTS" != '' ] ; then
echo TCPFLOOD_EXTRA_OPTS set: $TCPFLOOD_EXTRA_OPTS
fi
if [ "$USE_AUTO_DEBUG" != 'on' ] ; then
rm -f IN_AUTO_DEBUG
fi
if [ -e IN_AUTO_DEBUG ]; then
export valgrind="valgrind --malloc-fill=ff --free-fill=fe --suppressions=$srcdir/known_issues.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --log-fd=1"
fi
;;
'check-ipv6-available') # check if IPv6 - will exit 77 when not OK
ifconfig -a |grep ::1
if [ $? -ne 0 ] ; then
echo this test requires an active IPv6 stack, which we do not have here
exit 77
fi
;;
'es-init') # initialize local Elasticsearch *testbench* instance for the next
# test. NOTE: do NOT put anything useful on that instance!
curl --silent -XDELETE localhost:${ES_PORT:-9200}/rsyslog_testbench
;;
'es-getdata') # read data from ES to a local file so that we can process
if [ "x$3" == "x" ]; then
es_get_port=9200
else
es_get_port=$3
fi
# it with out regular tooling.
# Note: param 2 MUST be number of records to read (ES does
# not return the full set unless you tell it explicitely).
curl --silent localhost:$es_get_port/rsyslog_testbench/_search?size=$2 > work
python $srcdir/es_response_get_msgnum.py > ${RSYSLOG_OUT_LOG}
;;
'getpid')
pid=$(cat $RSYSLOG_PIDBASE$2.pid)
;;
'wait-pid-termination') # wait for the pid in pid $2 to terminate, abort on timeout
i=0
out_pid=$2
if [[ "x$out_pid" == "x" ]]; then
terminated=1
else
terminated=0
fi
start_timeout="$(date)"
while [[ $terminated -eq 0 ]]; do
ps -p $out_pid &> /dev/null
if [[ $? != 0 ]]
then
terminated=1
fi
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
let "i++"
if test $i -gt $TB_TIMEOUT_STARTSTOP
then
echo "ABORT! Timeout waiting on shutdown"
echo "Wait initiated $start_timeout, now $(date)"
ps -fp $out_pid
echo "Instance is possibly still running and may need"
echo "manual cleanup."
error_exit 1
fi
done
unset terminated
unset out_pid
;;
'kill-immediate') # kill rsyslog unconditionally
kill -9 $(cat $RSYSLOG_PIDBASE.pid)
# note: we do not wait for the actual termination!
;;
'injectmsg-litteral') # inject litteral-payload via our inject interface (imdiag)
echo injecting msg payload from: $2
sed -e 's/^/injectmsg litteral /g' < "$2" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
# TODO: some return state checking? (does it really make sense here?)
;;
'check-mainq-spool') # check if mainqueue spool files exist, if not abort (we just check .qi).
echo There must exist some files now:
ls -l $RSYSLOG_DYNNAME.spool
echo .qi file:
cat $RSYSLOG_DYNNAME.spool/mainq.qi
if test ! -f $RSYSLOG_DYNNAME.spool/mainq.qi; then
echo "error: mainq.qi does not exist where expected to do so!"
error_exit 1
fi
;;
'assert-equal')
if [ "x$4" == "x" ]; then
tolerance=0
else
tolerance=$4
fi
result=$(echo $2 $3 $tolerance | awk 'function abs(v) { return v > 0 ? v : -v } { print (abs($1 - $2) <= $3) ? "PASSED" : "FAILED" }')
if [ $result != 'PASSED' ]; then
echo "Value '$2' != '$3' (within tolerance of $tolerance)"
error_exit 1
fi
;;
'ensure-no-process-exists')
ps -ef | grep -v grep | grep -qF "$2"
if [ "x$?" == "x0" ]; then
echo "assertion failed: process with name-fragment matching '$2' found"
error_exit 1
fi
;;
'grep-check') # grep for "$EXPECTED" present in rsyslog.log - env var must be set
# before this method is called
grep "$EXPECTED" ${RSYSLOG_OUT_LOG} > /dev/null
if [ $? -eq 1 ]; then
echo "GREP FAIL: ${RSYSLOG_OUT_LOG} content:"
cat ${RSYSLOG_OUT_LOG}
echo "GREP FAIL: expected text not found:"
echo "$EXPECTED"
error_exit 1
fi;
;;
'wait-file-lines')
# $2 filename, $3 expected nbr of lines, $4 nbr of tries
if [ "$4" == "" ]; then
timeoutend=1
else
timeoutend=$4
fi
timecounter=0
while [ $timecounter -lt $timeoutend ]; do
(( timecounter++ ))
count=$(wc -l < ${RSYSLOG_OUT_LOG})
if [ $count -eq $3 ]; then
echo wait-file-lines success, have $3 lines
break
else
if [ "x$timecounter" == "x$timeoutend" ]; then
echo wait-file-lines failed, expected $3 got $count
shutdown_when_empty
wait_shutdown
error_exit 1
else
echo wait-file-lines not yet there, currently $count lines
$TESTTOOL_DIR/msleep 200
fi
fi
done
unset count
;;
'block-stats-flush')
echo blocking stats flush
echo "blockStatsReporting" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
;;
'await-stats-flush-after-block')
echo unblocking stats flush and waiting for it
echo "awaitStatsReport" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
;;
'allow-single-stats-flush-after-block-and-wait-for-it')
echo blocking stats flush
echo "awaitStatsReport block_again" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
;;
'wait-for-stats-flush')
echo "will wait for stats push"
while [[ ! -f $2 ]]; do
echo waiting for stats file "'$2'" to be created
$TESTTOOL_DIR/msleep 100
done
prev_count=$(grep 'BEGIN$' <$2 | wc -l)
new_count=$prev_count
while [[ "x$prev_count" == "x$new_count" ]]; do
new_count=$(grep 'BEGIN$' <$2 | wc -l) # busy spin, because it allows as close timing-coordination in actual test run as possible
done
echo "stats push registered"
;;
'wait-for-dyn-stats-reset')
echo "will wait for dyn-stats-reset"
while [[ ! -f $2 ]]; do
echo waiting for stats file "'$2'" to be created
$TESTTOOL_DIR/msleep 100
done
prev_purged=$(grep -F 'origin=dynstats' < $2 | grep -F "${3}.purge_triggered=" | sed -e 's/.\+.purge_triggered=//g' | awk '{s+=$1} END {print s}')
new_purged=$prev_purged
while [[ "x$prev_purged" == "x$new_purged" ]]; do
new_purged=$(grep -F 'origin=dynstats' < "$2" | grep -F "${3}.purge_triggered=" | sed -e 's/.\+\.purge_triggered=//g' | awk '{s+=$1} END {print s}') # busy spin, because it allows as close timing-coordination in actual test run as possible
$TESTTOOL_DIR/msleep 10
done
echo "dyn-stats reset for bucket ${3} registered"
;;
# note needed at the moment, but let's keep it in a little, 2018-09-03 rgerhards
# 'content-check-regex')
# # this does a content check which permits regex
# grep "$2" $3 -q
# if [ "$?" -ne "0" ]; then
# echo "----------------------------------------------------------------------"
# echo FAIL: content-check-regex failed to find "'$2'" inside "'$3'"
# echo "file contents:"
# cat $3
# error_exit 1
# fi
# ;;
'first-column-sum-check')
sum=$(grep $3 < $4 | sed -e $2 | awk '{s+=$1} END {print s}')
if [ "x${sum}" != "x$5" ]; then
printf '\n============================================================\n'
echo FAIL: sum of first column with edit-expr "'$2'" run over lines from file "'$4'" matched by "'$3'" equals "'$sum'" which is NOT equal to EXPECTED value of "'$5'"
echo "file contents:"
cat $4
error_exit 1
fi
;;
'assert-first-column-sum-greater-than')
sum=$(grep $3 <$4| sed -e $2 | awk '{s+=$1} END {print s}')
if [ ! $sum -gt $5 ]; then
echo sum of first column with edit-expr "'$2'" run over lines from file "'$4'" matched by "'$3'" equals "'$sum'" which is smaller than expected lower-limit of "'$5'"
echo "file contents:"
cat $4
error_exit 1
fi
;;
'content-pattern-check')
grep -q "$2" < ${RSYSLOG_OUT_LOG}
if [ "$?" -ne "0" ]; then
echo content-check failed, not every line matched pattern "'$2'"
echo "file contents:"
cat -n $4
error_exit 1
fi
;;
'require-journalctl') # check if journalctl exists on the system
if ! hash journalctl 2>/dev/null ; then
echo "journalctl command missing, skipping test"
exit 77
fi
;;
'download-elasticsearch')
if [ ! -d $dep_cache_dir ]; then
echo "Creating dependency cache dir $dep_cache_dir"
mkdir $dep_cache_dir
fi
if [ ! -f $dep_es_cached_file ]; then
if [ -f /local_dep_cache/$ES_DOWNLOAD ]; then
printf 'ElasticSearch: satisfying dependency %s from system cache.\n' "$ES_DOWNLOAD"
cp /local_dep_cache/$ES_DOWNLOAD $dep_es_cached_file
else
dep_es_url="https://artifacts.elastic.co/downloads/elasticsearch/$ES_DOWNLOAD"
printf 'ElasticSearch: satisfying dependency %s from %s\n' "$ES_DOWNLOAD" "$dep_es_url"
wget -q $dep_es_url -O $dep_es_cached_file
fi
fi
;;
'prepare-elasticsearch') # $2, if set, is the number of additional ES instances
# Heap Size (limit to 128MB for testbench! defaults is way to HIGH)
export ES_JAVA_OPTS="-Xms128m -Xmx128m"
if [ "x$2" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_es_config="es.yml"
dep_work_es_pidfile="es.pid"
else
dep_work_dir=$(readlink -f $srcdir/$2)
dep_work_es_config="es$2.yml"
dep_work_es_pidfile="es$2.pid"
fi
if [ ! -f $dep_es_cached_file ]; then
echo "Dependency-cache does not have elasticsearch package, did "
echo "you download dependencies?"
error_exit 1
fi
if [ ! -d $dep_work_dir ]; then
echo "Creating dependency working directory"
mkdir -p $dep_work_dir
fi
if [ -d $dep_work_dir/es ]; then
if [ -e $dep_work_es_pidfile ]; then
es_pid=$(cat $dep_work_es_pidfile)
kill -SIGTERM $es_pid
. $srcdir/diag.sh wait-pid-termination $es_pid
fi
fi
rm -rf $dep_work_dir/es
echo TEST USES ELASTICSEARCH BINARY $dep_es_cached_file
(cd $dep_work_dir && tar -zxf $dep_es_cached_file --xform $dep_es_dir_xform_pattern --show-transformed-names) > /dev/null
if [ -n "${ES_PORT:-}" ] ; then
rm -f $dep_work_dir/es/config/elasticsearch.yml
sed "s/^http.port:.*\$/http.port: ${ES_PORT}/" $srcdir/testsuites/$dep_work_es_config > $dep_work_dir/es/config/elasticsearch.yml
else
cp -f $srcdir/testsuites/$dep_work_es_config $dep_work_dir/es/config/elasticsearch.yml
fi
if [ ! -d $dep_work_dir/es/data ]; then
echo "Creating elastic search directories"
mkdir -p $dep_work_dir/es/data
mkdir -p $dep_work_dir/es/logs
mkdir -p $dep_work_dir/es/tmp
fi
echo ElasticSearch prepared for use in test.
;;
'start-elasticsearch') # $2, if set, is the number of additional ES instances
# Heap Size (limit to 128MB for testbench! defaults is way to HIGH)
export ES_JAVA_OPTS="-Xms128m -Xmx128m"
pwd
if [ "x$2" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_es_config="es.yml"
dep_work_es_pidfile="$(pwd)/es.pid"
else
dep_work_dir=$(readlink -f $srcdir/$2)
dep_work_es_config="es$2.yml"
dep_work_es_pidfile="es$2.pid"
fi
echo "Starting ElasticSearch instance $2"
# THIS IS THE ACTUAL START of ES
$dep_work_dir/es/bin/elasticsearch -p $dep_work_es_pidfile -d
$TESTTOOL_DIR/msleep 2000
echo "Starting instance $2 started with PID" $(cat $dep_work_es_pidfile)
# Wait for startup with hardcoded timeout
timeoutend=60
timeseconds=0
# Loop until elasticsearch port is reachable or until
# timeout is reached!
until [ "$(curl --silent --show-error --connect-timeout 1 http://localhost:${ES_PORT:-19200} | grep 'rsyslog-testbench')" != "" ]; do
echo "--- waiting for ES startup: $timeseconds seconds"
$TESTTOOL_DIR/msleep 1000
let "timeseconds=$timeseconds + 1"
if [ "$timeseconds" -gt "$timeoutend" ]; then
echo "--- TIMEOUT ( $timeseconds ) reached!!!"
error_exit 1
fi
done
$TESTTOOL_DIR/msleep 2000
echo ES startup succeeded
;;
'stop-elasticsearch')
if [ "x$2" == "x" ]; then
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_es_pidfile="es.pid"
else
dep_work_dir=$(readlink -f $srcdir/$2)
dep_work_es_pidfile="es$2.pid"
fi
if [ -e $dep_work_es_pidfile ]; then
es_pid=$(cat $dep_work_es_pidfile)
printf 'stopping ES with pid %d\n' $es_pid
kill -SIGTERM $es_pid
. $srcdir/diag.sh wait-pid-termination $es_pid
fi
;;
'cleanup-elasticsearch')
dep_work_dir=$(readlink -f .dep_wrk)
dep_work_es_pidfile="es.pid"
. $srcdir/diag.sh stop-elasticsearch
rm -f $dep_work_es_pidfile
rm -rf $dep_work_dir/es
;;
'check-inotify') # Check for inotify/fen support
if [ -n "$(find /usr/include -name 'inotify.h' -print -quit)" ]; then
echo [inotify mode]
elif [ -n "$(find /usr/include/sys/ -name 'port.h' -print -quit)" ]; then
grep -qF "PORT_SOURCE_FILE" < /usr/include/sys/port.h
if [ "$?" -ne "0" ]; then
echo [port.h found but FEN API not implemented , skipping...]
exit 77 # FEN API not available, skip this test
fi
echo [fen mode]
else
echo [inotify/fen not supported, skipping...]
exit 77 # no inotify available, skip this test
fi
;;
'check-inotify-only') # Check for ONLY inotify support
if [ -n "$(find /usr/include -name 'inotify.h' -print -quit)" ]; then
echo [inotify mode]
else
echo [inotify not supported, skipping...]
exit 77 # no inotify available, skip this test
fi
;;
*) echo "TESTBENCH error: invalid argument" $1
exit 100
esac