core bugfix: potential segfault when shutting down rsyslog

when rulesets are nested a segfault can occur when shutting down
rsyslog. the reason is that rule sets are destructed in load order,
which means a "later" ruleset may still be active when an "earlier"
one was already destructed. In these cases, a "call" can invalidly
call into the earlier ruleset, which is destructed and so leads to
invalid memory access. If a segfault actually happens depends on the
OS, but it is highly probable.

The cure is to split the queue shutdown sequence. In a first step,
all worker threads are terminated and the queue set to enqOnly.
While some are terminated, it is still possible that the others
enqueue messages into the queue (which are then just placed into the
queue, not processed). After this happens, a call can no longer
be issued (as there are no more workers). So then we can destruct
the rulesets in any order.

closes https://github.com/rsyslog/rsyslog/issues/1122
This commit is contained in:
Rainer Gerhards 2017-10-18 09:20:49 +02:00
parent 84b980d581
commit 3fbd901b3e
4 changed files with 26 additions and 4 deletions

View File

@ -1336,8 +1336,8 @@ cancelWorkers(qqueue_t *pThis)
* longer, because we no longer can persist the queue in parallel to waiting
* on worker timeouts.
*/
static rsRetVal
ShutdownWorkers(qqueue_t *pThis)
rsRetVal
qqueueShutdownWorkers(qqueue_t *const pThis)
{
DEFiRet;
@ -2653,7 +2653,7 @@ CODESTARTobjDestruct(qqueue)
*/
if(pThis->qType != QUEUETYPE_DIRECT && !pThis->bEnqOnly && pThis->pqParent == NULL
&& pThis->pWtpReg != NULL)
ShutdownWorkers(pThis);
qqueueShutdownWorkers(pThis);
if(pThis->bIsDA && getPhysicalQueueSize(pThis) > 0 && pThis->bSaveOnShutdown) {
CHKiRet(DoSaveOnShutdown(pThis));

View File

@ -207,6 +207,7 @@ rsRetVal qqueueApplyCnfParam(qqueue_t *pThis, struct nvlst *lst);
void qqueueSetDefaultsRulesetQueue(qqueue_t *pThis);
void qqueueSetDefaultsActionQueue(qqueue_t *pThis);
void qqueueDbgPrint(qqueue_t *pThis);
rsRetVal qqueueShutdownWorkers(qqueue_t *pThis);
PROTOTYPEObjClassInit(qqueue);
PROTOTYPEpropSetMeth(qqueue, iPersistUpdCnt, int);

View File

@ -226,7 +226,6 @@ freeCnf(rsconf_t *pThis)
}
}
/* destructor for the rsconf object */
PROTOTYPEobjDestruct(rsconf);
BEGINobjDestruct(rsconf) /* be sure to specify the object type also in END and CODESTART macros! */

View File

@ -826,6 +826,19 @@ CODESTARTobjDestruct(ruleset)
ENDobjDestruct(ruleset)
/* helper for Destructor, shut down queue workers */
DEFFUNC_llExecFunc(doShutdownQueueWorkers)
{
DEFiRet;
ruleset_t *const pThis = (ruleset_t*) pData;
DBGPRINTF("shutting down queue workers for ruleset %p, name %s, queue %p\n",
pThis, pThis->pszName, pThis->pQueue);
ISOBJ_TYPE_assert(pThis, ruleset);
if(pThis->pQueue != NULL) {
qqueueShutdownWorkers(pThis->pQueue);
}
RETiRet;
}
/* destruct ALL rule sets that reside in the system. This must
* be callable before unloading this module as the module may
* not be unloaded before unload of the actions is required. This is
@ -837,6 +850,15 @@ destructAllActions(rsconf_t *conf)
{
DEFiRet;
DBGPRINTF("rulesetDestructAllActions\n");
/* we first need to stop all queue workers, else we
* may run into trouble with "call" statements calling
* into then-destroyed rulesets.
* see: https://github.com/rsyslog/rsyslog/issues/1122
*/
DBGPRINTF("RRRRRR: rsconfDestruct - queue shutdown\n");
llExecFunc(&(conf->rulesets.llRulesets), doShutdownQueueWorkers, NULL);
CHKiRet(llDestroy(&(conf->rulesets.llRulesets)));
CHKiRet(llInit(&(conf->rulesets.llRulesets), rulesetDestructForLinkedList, rulesetKeyDestruct, strcasecmp));
conf->rulesets.pDflt = NULL;