some cleanup, some additional comments and a bit more debug output

This commit is contained in:
Rainer Gerhards 2010-03-18 12:34:26 +01:00
parent 5c7225b776
commit 1fb45d3e99
5 changed files with 24 additions and 19 deletions

View File

@ -316,7 +316,8 @@ static rsRetVal strmCloseFile(strm_t *pThis)
DEFiRet;
ASSERT(pThis != NULL);
DBGOPRINT((obj_t*) pThis, "file %d closing\n", pThis->fd);
DBGOPRINT((obj_t*) pThis, "file %d(%s) closing\n", pThis->fd,
(pThis->pszFName == NULL) ? "N/A" : (char*)pThis->pszFName);
if(pThis->tOperationsMode != STREAMMODE_READ) {
strmFlush(pThis);
@ -929,6 +930,7 @@ asyncWriterThread(void *pPtr)
while(1) { /* loop broken inside */
d_pthread_mutex_lock(&pThis->mut);
dbgprintf("XXX: asyncWriterThread iterating %s\n", pThis->pszFName);
while(pThis->iCnt == 0) {
if(pThis->bStopWriter) {
pthread_cond_broadcast(&pThis->isEmpty);
@ -944,6 +946,7 @@ asyncWriterThread(void *pPtr)
bTimedOut = 0;
timeoutComp(&t, pThis->iFlushInterval * 2000); /* *1000 millisconds */
if(pThis->bDoTimedWait) {
dbgprintf("asyncWriter thread going to timeout sleep\n");
if(pthread_cond_timedwait(&pThis->notEmpty, &pThis->mut, &t) != 0) {
int err = errno;
if(err == ETIMEDOUT) {
@ -957,13 +960,16 @@ asyncWriterThread(void *pPtr)
}
}
} else {
dbgprintf("asyncWriter thread going to eternal sleep\n");
d_pthread_cond_wait(&pThis->notEmpty, &pThis->mut);
}
dbgprintf("asyncWriter woke up\n");
}
bTimedOut = 0; /* we may have timed out, but there *is* work to do... */
iDeq = pThis->iDeq++ % STREAM_ASYNC_NUMBUFS;
dbgprintf("asyncWriter writes data\n");
doWriteInternal(pThis, pThis->asyncBuf[iDeq].pBuf, pThis->asyncBuf[iDeq].lenBuf);
// TODO: error check????? 2009-07-06
@ -1135,7 +1141,10 @@ strmFlush(strm_t *pThis)
DEFiRet;
ASSERT(pThis != NULL);
DBGOPRINT((obj_t*) pThis, "file %d flush, buflen %ld\n", pThis->fd, (long) pThis->iBufPtr);
DBGOPRINT((obj_t*) pThis, "file %d(%s) flush, buflen %ld\n", pThis->fd, pThis->pszFName, (long) pThis->iBufPtr);
DBGOPRINT((obj_t*) pThis, "file %d(%s) flush, buflen %ld%s\n", pThis->fd,
(pThis->pszFName == NULL) ? "N/A" : (char*)pThis->pszFName,
(long) pThis->iBufPtr, (pThis->iBufPtr == 0) ? " (no need to flush)" : "");
if(pThis->tOperationsMode != STREAMMODE_READ && pThis->iBufPtr > 0) {
iRet = strmSchedWrite(pThis, pThis->pIOBuf, pThis->iBufPtr);

View File

@ -8,8 +8,8 @@ echo ===========================================================================
echo TEST: \[asynwr_deadlock.sh\]: a case known to have caused a deadlock in the past
source $srcdir/diag.sh init
# uncomment for debugging support:
export RSYSLOG_DEBUG="debug nostdout noprintmutexaction"
export RSYSLOG_DEBUGLOG="log"
#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction"
#export RSYSLOG_DEBUGLOG="log"
source $srcdir/diag.sh startup asynwr_deadlock.conf
# just send one message
source $srcdir/diag.sh tcpflood -m1

View File

@ -20,10 +20,14 @@
# cirumstances. As far as the analysis goes, the following need to happen:
# 1. buffers on that file are being flushed
# 2. no new data arrives
# 3. the inactivity timeout has not yet happend
# 3. the inactivity timeout has not yet expired
# 4. *then* (and only then) the stream is closed or destructed
# In that, 1 to 3 are prequisites for the deadlock which will happen in 4. In order
# to create this case reliably, I have used the "$OMFileFlushOnTXEnd on" directive
# In that, 1 to 4 are prequisites for the deadlock which will happen in 4. However,
# for it to happen, we also need the right "timing". There is a race between the
# main thread and the async writer thread. The deadlock will only happen under
# the "right" circumstances, which basically means it will not happen always.
# In order to create this case as reliable as possible, I have used
# the "$OMFileFlushOnTXEnd on" directive
# inside my test case. It makes sure that #1 above happens. The test uses a dynafile
# cache size of 4, and the load generator generates data for 5 different dynafiles.
# So over time, we will hit a spot where 4 dynafiles are open and the 5th file name
@ -50,8 +54,8 @@ echo ===========================================================================
echo TEST: \[asynwr_deadlock2.sh\]: a case known to have caused a deadlock in the past
source $srcdir/diag.sh init
# uncomment for debugging support:
export RSYSLOG_DEBUG="debug nostdout noprintmutexaction"
export RSYSLOG_DEBUGLOG="log"
#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction"
#export RSYSLOG_DEBUGLOG="log"
source $srcdir/diag.sh startup asynwr_deadlock2.conf
# send 20000 messages, each close to 2K (non-randomized!), so that we can fill
# the buffers and hopefully run into the "deadlock".
@ -59,6 +63,6 @@ source $srcdir/diag.sh tcpflood -m20000 -d1800 -P129 -i1 -f5
# sleep is important! need to make sure the instance is inactive
source $srcdir/diag.sh shutdown-when-empty # shut down rsyslogd when done processing messages
source $srcdir/diag.sh wait-shutdown # and wait for it to terminate
zcat rsyslog.out.*.log > rsyslog.out.log
cat rsyslog.out.*.log > rsyslog.out.log
source $srcdir/diag.sh seq-check 1 20000 -E
source $srcdir/diag.sh exit

View File

@ -1,4 +1,3 @@
# simple async writing test
# rgerhards, 2010-03-09
$IncludeConfig diag-common.conf
@ -7,15 +6,8 @@ $MainMsgQueueTimeoutShutdown 10000
$InputTCPServerRun 13514
$template outfmt,"%msg:F,58:2%\n"
$template dynfile,"rsyslog.out.log" # trick to use relative path names!
$OMFileFlushOnTXEnd on
$OMFileFlushInterval 10
$OMFileFlushIOBufferSize 10k
#:msg, contains, "msgnum:" ?dynfile;outfmt
:msg, contains, "msgnum:" ./rsyslog.out.log;outfmt
$OMFileFlushOnTXEnd on
$OMFileFlushInterval 10
$OMFileFlushIOBufferSize 10k
#:msg, contains, "msgnum:" ./rsyslog.out.2.log;outfmt

View File

@ -8,7 +8,7 @@ $MainMsgQueueTimeoutShutdown 10000
$InputTCPServerRun 13514
$template outfmt,"%msg:F,58:3%,%msg:F,58:4%,%msg:F,58:5%\n"
$template dynfile,"rsyslog.out.%msg:F,58:2%.log" # trick to use relative path names!
$template dynfile,"rsyslog.out.%msg:F,58:2%.log" # use multiple dynafiles
$OMFileFlushOnTXEnd off
$OMFileZipLevel 6
$OMFileIOBufferSize 256k