mirror of
https://github.com/rsyslog/rsyslog.git
synced 2025-12-15 10:30:40 +01:00
This patch implements a simple round-robin load balancer for omfwd. It provides equal distribution of load to a pool of target servers. The code currently has no different modes and no special tuning for the load balancer. However, it works very well in the most common use cases. Furthermore, it provides a solid base on which more elaborate functionality can be build if there is need to. The new functionality is fully backwards compatible with previous configuration settings. New action() config params: * pool.resumeinterval New/"changed" rstats counters Each target receives its own set of pstats counters. Most importantly this is the case for byte counts. That counter retains the same naming, but there may now be multiple of these counters, one for each target ip, port tuple. New pstats message count to target Among others, this can be used for checking that the load balancer works as intended. The so-far byte count emitted does not provide a clear indication of how many messages the targets had actually processed. For obvious reasons, this message count makes most sense in advanced load balancing scenarios, but also provides additional insight into round-robin. Non-matches indicate that targets went offline, and we can now evaluate the impact this had on processing. - re-design rebind functionality This now works at the transaction level. It causes a rebind of all pool members. Previous code did not work 100% correct since for a couple of years now (after output batching integration). As cleanup, rebindInterval support has been removed from tcpClt, because omfwd is the only user. This permits a cleaner code path. We also noticed a bug with rebindInterval: it caused some mild message duplication for quite some time. This went unnoticed. To address that efficiently, rebindInterval in the future will be considered once per batch. That means up to (maxBatchSize - 1) messages may be transmitted more than the rebindinterval is. That's the cleanest mode of operation and should not make any difference for real deployments. Some additional work done in this commit: netstream: harden component against upper-layer logic errors network subsystem: better handle API errors and provide more info omfwd: add new parameter "iobuffer.maxsize" add new global parameter debug.abortoninternalerror and use it This parameter permits to make test runs fail when an internal error is detected and gracefully handled by rsyslog. While it is great to have it gracefully handled in practice, we should not accept this during testing. The new parameter permits to abort in this case and emits the related error message beforehand. It is turned on by default in our regular tests. add dedicated error code for "hard" program errors omfwd: some cleanup + error message fix + new debug level messages imptcp: improve error messages add omfwd option to NOT do extended connection check also output wrkr id in some omfwd messages (primarily debugging aid) better debug info via LogMsg() interface improve messages regarding imptcp and omfwd suspension / thread IDs refactor and enchance minitcpsrvr for mimicing died servers new global (debugging) option, correction of an informational msg add global option allmessagestostderr add new tests
170 lines
4.9 KiB
Bash
Executable File
170 lines
4.9 KiB
Bash
Executable File
#!/bin/bash
|
|
# Copyright (C) 2011 by Rainer Gerhards
|
|
# This file is part of the rsyslog project, released under ASL 2.0
|
|
. ${srcdir:=.}/diag.sh init
|
|
skip_platform "FreeBSD" "This test does not work on FreeBSD - problems with os utility option switches"
|
|
#
|
|
# STEP1: start both instances and send 1000 messages.
|
|
# Note: receiver is instance 1, sender instance 2.
|
|
#
|
|
# start up the instances. Note that the environment settings can be changed to
|
|
# set instance-specific debugging parameters!
|
|
#export RSYSLOG_DEBUG="debug nostdout"
|
|
#export RSYSLOG_DEBUGLOG="log2"
|
|
echo starting receiver
|
|
generate_conf
|
|
add_conf '
|
|
# then SENDER sends to this port (not tcpflood!)
|
|
module(load="../plugins/imtcp/.libs/imtcp")
|
|
input(type="imtcp" port="0" listenPortFileName="'$RSYSLOG_DYNNAME'.tcpflood_port" )
|
|
|
|
$template outfmt,"%msg:F,58:2%\n"
|
|
:msg, contains, "msgnum:" ./'$RSYSLOG_OUT_LOG';outfmt
|
|
'
|
|
startup
|
|
export PORT_RCVR="$TCPFLOOD_PORT"
|
|
#export RSYSLOG_DEBUG="debug nostdout"
|
|
#export RSYSLOG_DEBUGLOG="log"
|
|
#valgrind="valgrind"
|
|
echo starting sender
|
|
generate_conf 2
|
|
export TCPFLOOD_PORT="$(get_free_port)"
|
|
add_conf '
|
|
$WorkDirectory '$RSYSLOG_DYNNAME'.spool
|
|
$MainMsgQueueSize 2000
|
|
$MainMsgQueueLowWaterMark 800
|
|
$MainMsgQueueHighWaterMark 1000
|
|
$MainMsgQueueDequeueBatchSize 1
|
|
$MainMsgQueueMaxFileSize 1g
|
|
$MainMsgQueueWorkerThreads 1
|
|
$MainMsgQueueFileName mainq
|
|
|
|
# we use the shortest resume interval a) to let the test not run too long
|
|
# and b) make sure some retries happen before the reconnect
|
|
$ActionResumeInterval 1
|
|
$ActionSendResendLastMsgOnReconnect on
|
|
$ActionResumeRetryCount -1
|
|
*.* @@127.0.0.1:'$PORT_RCVR'
|
|
' 2
|
|
startup 2
|
|
# re-set params so that new instances do not thrash it...
|
|
#unset RSYSLOG_DEBUG
|
|
#unset RSYSLOG_DEBUGLOG
|
|
|
|
# now inject the messages into instance 2. It will connect to instance 1,
|
|
# and that instance will record the data.
|
|
injectmsg2 1 1000
|
|
wait_queueempty
|
|
./msleep 1000 # let things settle down a bit
|
|
|
|
#
|
|
# Step 2: shutdown receiver, then send some more data, which then
|
|
# needs to go into the queue.
|
|
#
|
|
echo step 2
|
|
|
|
shutdown_when_empty
|
|
wait_shutdown
|
|
|
|
injectmsg2 1001 10000
|
|
./msleep 3000 # make sure some retries happen (retry interval is set to 3 second)
|
|
get_mainqueuesize 2
|
|
ls -l ${RSYSLOG_DYNNAME}.spool
|
|
|
|
#
|
|
# Step 3: restart receiver, wait that the sender drains its queue
|
|
$InputTCPServerRun '$PORT_RCVR'
|
|
#
|
|
echo step 3
|
|
#export RSYSLOG_DEBUGLOG="log2"
|
|
generate_conf
|
|
add_conf '
|
|
# then SENDER sends to this port (not tcpflood!)
|
|
module(load="../plugins/imtcp/.libs/imtcp")
|
|
input(type="imtcp" port="'$PORT_RCVR'")
|
|
|
|
$template outfmt,"%msg:F,58:2%\n"
|
|
:msg, contains, "msgnum:" ./'$RSYSLOG_OUT_LOG';outfmt
|
|
'
|
|
startup
|
|
echo waiting for sender to drain queue [may need a short while]
|
|
wait_queueempty 2
|
|
ls -l ${RSYSLOG_DYNNAME}.spool
|
|
OLDFILESIZE=$(stat -c%s ${RSYSLOG_DYNNAME}.spool/mainq.00000001)
|
|
echo file size to expect is $OLDFILESIZE
|
|
|
|
|
|
#
|
|
# Step 4: send new data. Queue files are not permitted to grow now
|
|
# (but one file continuous to exist).
|
|
#
|
|
echo step 4
|
|
injectmsg2 11001 10
|
|
wait_queueempty 2
|
|
|
|
# at this point, the queue file shall not have grown. Note
|
|
# that we MUST NOT shut down the instance right now, because it
|
|
# would clean up the queue files! So we need to do our checks
|
|
# first (here!).
|
|
ls -l ${RSYSLOG_DYNNAME}.spool
|
|
NEWFILESIZE=$(stat -c%s ${RSYSLOG_DYNNAME}.spool/mainq.00000001)
|
|
if [ $NEWFILESIZE != $OLDFILESIZE ]
|
|
then
|
|
echo file sizes do not match, expected $OLDFILESIZE, actual $NEWFILESIZE
|
|
echo this means that data has been written to the queue file where it
|
|
echo no longer should be written.
|
|
# abort will happen below, because we must ensure proper system shutdown
|
|
# HOWEVER, during actual testing it may be useful to do an exit here (so
|
|
# that e.g. the debug log is pointed right at the correct spot).
|
|
# exit 1
|
|
fi
|
|
|
|
#
|
|
# We now do an extra test (so this is two in one ;)) to see if the DA
|
|
# queue can be reactivated after its initial shutdown. In essence, we
|
|
# redo steps 2 and 3.
|
|
#
|
|
# Step 5: stop receiver again, then send some more data, which then
|
|
# needs to go into the queue.
|
|
#
|
|
echo step 5
|
|
echo "*** done primary test *** now checking if DA can be restarted"
|
|
shutdown_when_empty
|
|
wait_shutdown
|
|
|
|
injectmsg2 11011 10000
|
|
sleep 1 # we need to wait, otherwise we may be so fast that the receiver
|
|
# comes up before we have finally suspended the action
|
|
get_mainqueuesize 2
|
|
ls -l ${RSYSLOG_DYNNAME}.spool
|
|
|
|
#
|
|
# Step 6: restart receiver, wait that the sender drains its queue
|
|
#
|
|
echo step 6
|
|
startup
|
|
echo waiting for sender to drain queue [may need a short while]
|
|
wait_queueempty 2
|
|
ls -l ${RSYSLOG_DYNNAME}.spool
|
|
|
|
#
|
|
# Queue file size checks done. Now it is time to terminate the system
|
|
# and see if everything could be received (the usual check, done here
|
|
# for completeness, more or less as a bonus).
|
|
#
|
|
shutdown_when_empty 2
|
|
wait_shutdown 2
|
|
|
|
# now it is time to stop the receiver as well
|
|
shutdown_when_empty
|
|
wait_shutdown
|
|
|
|
# now abort test if we need to (due to filesize predicate)
|
|
if [ $NEWFILESIZE != $OLDFILESIZE ]; then
|
|
error_exit 1
|
|
fi
|
|
# do the final check
|
|
export SEQ_CHECK_OPTIONS=-d
|
|
seq_check 1 21010 -m 100
|
|
exit_test
|