mirror of
https://github.com/rsyslog/rsyslog.git
synced 2026-06-19 17:32:56 +02:00
queue: harden disk recovery after invalid .qi
Hardens disk-queue recovery after an invalid .qi so read/write pointers realign and on-disk size is corrected. This prevents stuck queues and stabilizes the daqueue dirty shutdown test. Bug Fixes - On anomaly (rd==wr and offsets equal), seek the read-delete cursor to the writer, subtract deleted bytes from sizeOnDisk, and align the read-dequeue cursor; keep draining if seek fails. - Log errors when pointer resets or seeks fail. - Add strm.Sync() to keep stream state consistent after pointer updates. - Refactor invalid .qi recovery and startup seek errors into helpers. - When spool read files are missing on startup, align read to write and continue recovery. With the help of AI-Agents: gpt-5.2-codex
This commit is contained in:
parent
526702e15c
commit
08027ecff5
101
runtime/queue.c
101
runtime/queue.c
@ -243,6 +243,9 @@ static rsRetVal qDestructDirect(qqueue_t __attribute__((unused)) * pThis);
|
|||||||
static rsRetVal qConstructDirect(qqueue_t __attribute__((unused)) * pThis);
|
static rsRetVal qConstructDirect(qqueue_t __attribute__((unused)) * pThis);
|
||||||
static rsRetVal qDestructDisk(qqueue_t *pThis);
|
static rsRetVal qDestructDisk(qqueue_t *pThis);
|
||||||
rsRetVal qqueueSetSpoolDir(qqueue_t *pThis, uchar *pszSpoolDir, int lenSpoolDir);
|
rsRetVal qqueueSetSpoolDir(qqueue_t *pThis, uchar *pszSpoolDir, int lenSpoolDir);
|
||||||
|
static rsRetVal handleReadSeekError(rsRetVal seekRet, qqueue_t *pThis, const char *streamName, sbool *pReadSeekFailed);
|
||||||
|
static void alignReadDeqToWrite(qqueue_t *pThis);
|
||||||
|
static void recoverFromInvalidQi(qqueue_t *pThis, int wr_fd, int64_t wr_offs);
|
||||||
|
|
||||||
/* some constants for queuePersist () */
|
/* some constants for queuePersist () */
|
||||||
#define QUEUE_CHECKPOINT 1
|
#define QUEUE_CHECKPOINT 1
|
||||||
@ -972,8 +975,15 @@ static rsRetVal qqueueTryLoadPersistedInfo(qqueue_t *pThis) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pWrite));
|
CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pWrite));
|
||||||
CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pReadDel));
|
rsRetVal seekRet = strm.SeekCurrOffs(pThis->tVars.disk.pReadDel);
|
||||||
CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pReadDeq));
|
sbool read_seek_failed = 0;
|
||||||
|
CHKiRet(handleReadSeekError(seekRet, pThis, "read/delete", &read_seek_failed));
|
||||||
|
seekRet = strm.SeekCurrOffs(pThis->tVars.disk.pReadDeq);
|
||||||
|
CHKiRet(handleReadSeekError(seekRet, pThis, "read", &read_seek_failed));
|
||||||
|
if (read_seek_failed) {
|
||||||
|
/* Align read pointer to write pointer to trigger recovery later. */
|
||||||
|
alignReadDeqToWrite(pThis);
|
||||||
|
}
|
||||||
|
|
||||||
/* OK, we could successfully read the file, so we now can request that it be
|
/* OK, we could successfully read the file, so we now can request that it be
|
||||||
* deleted when we are done with the persisted information.
|
* deleted when we are done with the persisted information.
|
||||||
@ -1704,6 +1714,92 @@ finalize_it:
|
|||||||
RETiRet;
|
RETiRet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static rsRetVal handleReadSeekError(rsRetVal seekRet,
|
||||||
|
qqueue_t *pThis,
|
||||||
|
const char *streamName,
|
||||||
|
sbool *const pReadSeekFailed) {
|
||||||
|
assert(pReadSeekFailed != NULL);
|
||||||
|
if (seekRet == RS_RET_OK) {
|
||||||
|
return RS_RET_OK;
|
||||||
|
}
|
||||||
|
if (seekRet == RS_RET_FILE_NOT_FOUND) {
|
||||||
|
LogError(0, seekRet, "%s: disk queue %s file missing on startup", obj.GetName((obj_t *)pThis), streamName);
|
||||||
|
*pReadSeekFailed = 1;
|
||||||
|
return RS_RET_OK;
|
||||||
|
}
|
||||||
|
return seekRet;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void alignReadDeqToWrite(qqueue_t *pThis) {
|
||||||
|
if (pThis->tVars.disk.pReadDeq == NULL || pThis->tVars.disk.pWrite == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const rsRetVal syncRet = strm.Sync(pThis->tVars.disk.pReadDeq, pThis->tVars.disk.pWrite);
|
||||||
|
if (syncRet != RS_RET_OK) {
|
||||||
|
LogError(0, syncRet, "%s: could not sync disk queue read pointer to write pointer",
|
||||||
|
obj.GetName((obj_t *)pThis));
|
||||||
|
}
|
||||||
|
const rsRetVal seekRet = strm.SeekCurrOffs(pThis->tVars.disk.pReadDeq);
|
||||||
|
if (seekRet != RS_RET_OK) {
|
||||||
|
LogError(0, seekRet, "%s: could not seek disk queue read pointer after startup recovery hint",
|
||||||
|
obj.GetName((obj_t *)pThis));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void recoverFromInvalidQi(qqueue_t *pThis, const int wr_fd, const int64_t wr_offs) {
|
||||||
|
if (pThis->tVars.disk.pReadDel == NULL || pThis->tVars.disk.pWrite == NULL || wr_fd < 0 || wr_offs < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
off64_t bytesDel = 0;
|
||||||
|
int reset_done = 0;
|
||||||
|
const unsigned int del_fnum = pThis->tVars.disk.pReadDel->iCurrFNum;
|
||||||
|
if (del_fnum > (unsigned int)wr_fd) {
|
||||||
|
LogError(0, RS_RET_ERR,
|
||||||
|
"%s: invalid .qi file; delete stream ahead of write stream "
|
||||||
|
"(del_fnum=%u, wr_fd=%d); keeping delete pointer",
|
||||||
|
obj.GetName((obj_t *)pThis), del_fnum, wr_fd);
|
||||||
|
reset_done = 1;
|
||||||
|
} else {
|
||||||
|
const rsRetVal delRet = strmMultiFileSeek(pThis->tVars.disk.pReadDel, (unsigned int)wr_fd, wr_offs, &bytesDel);
|
||||||
|
if (delRet != RS_RET_OK) {
|
||||||
|
LogError(0, delRet, "%s: could not reset disk queue pointers after invalid .qi file",
|
||||||
|
obj.GetName((obj_t *)pThis));
|
||||||
|
} else {
|
||||||
|
if (bytesDel != 0) {
|
||||||
|
if (pThis->tVars.disk.sizeOnDisk >= bytesDel) {
|
||||||
|
pThis->tVars.disk.sizeOnDisk -= bytesDel;
|
||||||
|
} else {
|
||||||
|
pThis->tVars.disk.sizeOnDisk = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
reset_done = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (reset_done && pThis->tVars.disk.pReadDeq != NULL) {
|
||||||
|
const rsRetVal syncRet = strm.Sync(pThis->tVars.disk.pReadDeq, pThis->tVars.disk.pReadDel);
|
||||||
|
if (syncRet != RS_RET_OK) {
|
||||||
|
LogError(0, syncRet, "%s: could not sync disk queue read pointer after invalid .qi file",
|
||||||
|
obj.GetName((obj_t *)pThis));
|
||||||
|
}
|
||||||
|
/* Non-fatal: queue already reset; keep draining if seek fails. */
|
||||||
|
const rsRetVal seekRet = strm.SeekCurrOffs(pThis->tVars.disk.pReadDeq);
|
||||||
|
if (seekRet != RS_RET_OK) {
|
||||||
|
LogError(0, seekRet, "%s: could not seek disk queue read pointer after invalid .qi file",
|
||||||
|
obj.GetName((obj_t *)pThis));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (reset_done) {
|
||||||
|
const rsRetVal persistRet = qqueuePersist(pThis, QUEUE_CHECKPOINT);
|
||||||
|
if (persistRet != RS_RET_OK) {
|
||||||
|
LogError(0, persistRet, "%s: could not persist disk queue after invalid .qi reset",
|
||||||
|
obj.GetName((obj_t *)pThis));
|
||||||
|
} else {
|
||||||
|
pThis->iUpdsSincePersist = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Finally remove n elements from the queue store.
|
/* Finally remove n elements from the queue store.
|
||||||
*/
|
*/
|
||||||
@ -1936,6 +2032,7 @@ static rsRetVal ATTR_NONNULL() DequeueConsumableElements(qqueue_t *const pThis,
|
|||||||
iOverallQueueSize -= iQueueSize;
|
iOverallQueueSize -= iQueueSize;
|
||||||
#endif
|
#endif
|
||||||
pThis->iQueueSize -= iQueueSize;
|
pThis->iQueueSize -= iQueueSize;
|
||||||
|
recoverFromInvalidQi(pThis, wr_fd, wr_offs);
|
||||||
iQueueSize = 0;
|
iQueueSize = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2322,6 +2322,19 @@ static rsRetVal strmGetCurrOffset(strm_t *pThis, int64 *pOffs) {
|
|||||||
RETiRet;
|
RETiRet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static rsRetVal strmSync(strm_t *pStrmDest, const strm_t *pStrmSrc) {
|
||||||
|
DEFiRet;
|
||||||
|
|
||||||
|
ISOBJ_TYPE_assert(pStrmDest, strm);
|
||||||
|
ISOBJ_TYPE_assert(pStrmSrc, strm);
|
||||||
|
|
||||||
|
pStrmDest->iCurrFNum = pStrmSrc->iCurrFNum;
|
||||||
|
pStrmDest->iCurrOffs = pStrmSrc->iCurrOffs;
|
||||||
|
pStrmDest->strtOffs = pStrmSrc->strtOffs;
|
||||||
|
|
||||||
|
RETiRet;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* queryInterface function
|
/* queryInterface function
|
||||||
* rgerhards, 2008-02-29
|
* rgerhards, 2008-02-29
|
||||||
@ -2356,6 +2369,7 @@ BEGINobjQueryInterface(strm)
|
|||||||
pIf->Serialize = strmSerialize;
|
pIf->Serialize = strmSerialize;
|
||||||
pIf->GetCurrOffset = strmGetCurrOffset;
|
pIf->GetCurrOffset = strmGetCurrOffset;
|
||||||
pIf->Dup = strmDup;
|
pIf->Dup = strmDup;
|
||||||
|
pIf->Sync = strmSync;
|
||||||
pIf->SetCompressionWorkers = SetCompressionWorkers;
|
pIf->SetCompressionWorkers = SetCompressionWorkers;
|
||||||
pIf->SetWCntr = strmSetWCntr;
|
pIf->SetWCntr = strmSetWCntr;
|
||||||
pIf->CheckFileChange = CheckFileChange;
|
pIf->CheckFileChange = CheckFileChange;
|
||||||
|
|||||||
@ -200,6 +200,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */
|
|||||||
rsRetVal (*GetCurrOffset)(strm_t *pThis, int64 *pOffs);
|
rsRetVal (*GetCurrOffset)(strm_t *pThis, int64 *pOffs);
|
||||||
rsRetVal (*SetWCntr)(strm_t *pThis, number_t *pWCnt);
|
rsRetVal (*SetWCntr)(strm_t *pThis, number_t *pWCnt);
|
||||||
rsRetVal (*Dup)(strm_t *pThis, strm_t **ppNew);
|
rsRetVal (*Dup)(strm_t *pThis, strm_t **ppNew);
|
||||||
|
rsRetVal (*Sync)(strm_t *pStrmDest, const strm_t *pStrmSrc);
|
||||||
rsRetVal (*SetCompressionWorkers)(strm_t *const pThis, int num_wrkrs);
|
rsRetVal (*SetCompressionWorkers)(strm_t *const pThis, int num_wrkrs);
|
||||||
INTERFACEpropSetMeth(strm, bDeleteOnClose, int);
|
INTERFACEpropSetMeth(strm, bDeleteOnClose, int);
|
||||||
INTERFACEpropSetMeth(strm, iMaxFileSize, int64);
|
INTERFACEpropSetMeth(strm, iMaxFileSize, int64);
|
||||||
@ -227,7 +228,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */
|
|||||||
INTERFACEpropSetMeth(strm, cryprov, cryprov_if_t *);
|
INTERFACEpropSetMeth(strm, cryprov, cryprov_if_t *);
|
||||||
INTERFACEpropSetMeth(strm, cryprovData, void *);
|
INTERFACEpropSetMeth(strm, cryprovData, void *);
|
||||||
ENDinterface(strm)
|
ENDinterface(strm)
|
||||||
#define strmCURR_IF_VERSION 14 /* increment whenever you change the interface structure! */
|
#define strmCURR_IF_VERSION 15 /* increment whenever you change the interface structure! */
|
||||||
/* V10, 2013-09-10: added new parameter bEscapeLF, changed mode to uint8_t (rgerhards) */
|
/* V10, 2013-09-10: added new parameter bEscapeLF, changed mode to uint8_t (rgerhards) */
|
||||||
/* V11, 2015-12-03: added new parameter bReopenOnTruncate */
|
/* V11, 2015-12-03: added new parameter bReopenOnTruncate */
|
||||||
/* V12, 2015-12-11: added new parameter trimLineOverBytes, changed mode to uint32_t */
|
/* V12, 2015-12-11: added new parameter trimLineOverBytes, changed mode to uint32_t */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user