Fixes a "Bug in test code" situation, which had to omprog child death racing with next input.

Reasoning:
- kill has delivered signal
- child is waiting on input
- child gets input appends it to file and exits the while-loop
- another signal comes
- another log-line is injected
- the process is not dead yet but is out of the while loop
- it wont read the input and will never flush it to file because it never read it
- it dies, new process is started, it gets next message, but the previous message is now lost

It can lead to multiple messages being lost too.

The fix is to reduce the number of messages we pass through this read-log, write-log and kill-process cycle so we can afford some sleep and then actually adding some sleep.
This commit is contained in:
Janmejay Singh 2016-10-12 18:55:37 +05:30
parent 1bd2d6d480
commit 4886df7232
4 changed files with 19 additions and 11 deletions

View File

@ -14,13 +14,15 @@ sleep 1
old_fd_count=$(lsof -p $pid | wc -l)
for i in $(seq 5 100); do
for i in $(seq 5 10); do
pkill -USR1 omprog-test-bin
sleep .1
. $srcdir/diag.sh injectmsg $i 1
sleep .1
done
sleep 1
sleep .5
. $srcdir/diag.sh content-check "msgnum:00000099:"
. $srcdir/diag.sh content-check "msgnum:00000009:"
new_fd_count=$(lsof -p $pid | wc -l)
echo OLD: $old_fd_count NEW: $new_fd_count

View File

@ -14,18 +14,22 @@ sleep 1
old_fd_count=$(lsof -p $pid | wc -l)
for i in $(seq 5 100); do
for i in $(seq 5 10); do
pkill -USR1 omprog-test-bin
sleep .1
. $srcdir/diag.sh injectmsg $i 1
sleep .1
done
sleep 1
sleep .5
. $srcdir/diag.sh content-check "msgnum:00000099:"
. $srcdir/diag.sh content-check "msgnum:00000009:"
new_fd_count=$(lsof -p $pid | wc -l)
echo OLD: $old_fd_count NEW: $new_fd_count
. $srcdir/diag.sh assert-equal $old_fd_count $new_fd_count 2
cp rsyslog.out.log /tmp/
echo doing shutdown
. $srcdir/diag.sh shutdown-when-empty
echo wait on shutdown

View File

@ -14,13 +14,15 @@ sleep 1
old_fd_count=$(lsof -p $pid | wc -l)
for i in $(seq 5 100); do
for i in $(seq 5 10); do
pkill -USR1 omprog-test-bin
sleep .1
. $srcdir/diag.sh injectmsg $i 1
sleep .1
done
sleep 1
sleep .5
. $srcdir/diag.sh content-check "msgnum:00000099:"
. $srcdir/diag.sh content-check "msgnum:00000009:"
new_fd_count=$(lsof -p $pid | wc -l)
echo OLD: $old_fd_count NEW: $new_fd_count

View File

@ -1534,8 +1534,8 @@ mainloop(void)
pid_t child;
do {
child = waitpid(-1, NULL, WNOHANG);
DBGPRINTF("rsyslogd: child %d has terminated\n", child);
if (child != -1) {
DBGPRINTF("rsyslogd: mainloop waitpid (with-no-hang) returned %d\n", child);
if (child != -1 && child != 0) {
errmsg.LogError(0, RS_RET_OK, "Child %d has terminated, reaped by main-loop.", child);
}
} while(child > 0);