imfile bugfix: file reader could get stuck

State file handling was invalid. When a file was moved and re-created
rsyslog could use the file_id if the new file to write the old files'
state file. This could make the file reader stuck until it reached the
previous offset. Depending on file sizes this could never happen AND
would cause large message loss. This situation was timing dependent
(a race) and most frequently occurred under log rotation. In polling
mode the bug was less likely, but could also occur.

closes: https://github.com/rsyslog/rsyslog/issues/3465
closes: https://github.com/rsyslog/rsyslog/issues/3468
This commit is contained in:
Rainer Gerhards 2019-02-28 13:02:54 +01:00
parent 7d456466bb
commit ee77f27db0
No known key found for this signature in database
GPG Key ID: 0CB6B2A8BE80B499
7 changed files with 237 additions and 31 deletions

View File

@ -180,8 +180,10 @@ struct act_obj_s {
#endif
time_t timeoutBase; /* what time to calculate the timeout against? */
/* file dynamic data */
char file_id[FILE_ID_HASH_SIZE]; /* file id for this entry, once we could obtain it */
int in_move; /* workaround for inotify move: if set, state file must not be deleted */
ino_t ino; /* current inode nbr */
int fd; /* fd to file in order to obtain file_id (needs to be preserved across move) */
strm_t *pStrm; /* its stream (NULL if not assigned) */
int nRecords; /**< How many records did we process before persisting the stream? */
ratelimit_t *ratelimiter;
@ -682,6 +684,12 @@ act_obj_add(fs_edge_t *const edge, const char *const name, const int is_file,
}
}
}
DBGPRINTF("need to add new active object '%s' in '%s' - checking if accessible\n", name, edge->path);
const int fd = open(name, O_RDONLY | O_CLOEXEC);
if(fd < 0) {
LogMsg(errno, RS_RET_ERR, LOG_WARNING, "imfile: error accessing file '%s'", name);
FINALIZE;
}
DBGPRINTF("add new active object '%s' in '%s'\n", name, edge->path);
CHKmalloc(act = calloc(sizeof(act_obj_t), 1));
CHKmalloc(act->name = strdup(name));
@ -692,6 +700,8 @@ act_obj_add(fs_edge_t *const edge, const char *const name, const int is_file,
}
act->edge = edge;
act->ino = ino;
act->fd = fd;
act->file_id[0] = '\0';
act->is_symlink = is_symlink;
if (source) { /* we are target of symlink */
CHKmalloc(act->source_name = strdup(source));
@ -957,6 +967,9 @@ act_obj_destroy(act_obj_t *const act, const int is_deleted)
wdmapDel(act->wd);
}
#endif
if(act->fd >= 0) {
close(act->fd);
}
#if defined(OS_SOLARIS) && defined (HAVE_PORT_SOURCE_FILE)
if(act->pfinf != NULL) {
free(act->pfinf->fobj.fo_name);
@ -1210,24 +1223,24 @@ get_file_id_hash(const char *data, size_t lendata,
/* this returns the file-id for a given file
*/
static void ATTR_NONNULL(1, 2)
getFileID(const act_obj_t *const act, char *const buf, const size_t lenbuf)
static void ATTR_NONNULL(1)
getFileID(act_obj_t *const act)
{
*buf = '\0'; /* default: empty hash, only set if file has sufficient data */
const int fd = open(act->name, O_RDONLY | O_CLOEXEC);
if(fd >= 0) {
char filedata[FILE_ID_SIZE];
const int r = read(fd, filedata, FILE_ID_SIZE);
if(r == FILE_ID_SIZE) {
get_file_id_hash(filedata, sizeof(filedata), buf, lenbuf);
} else {
DBGPRINTF("getFileID partial or error read, ret %d\n", r);
}
close(fd);
} else {
DBGPRINTF("getFileID open %s failed\n", act->name);
if(act->file_id[0] != '\0') {
return; /* everything already done */
}
DBGPRINTF("getFileID for '%s', file_id_hash '%s'\n", act->name, buf);
assert(act->fd >= 0); /* fd must have been opened at act_obj_t creation! */
char filedata[FILE_ID_SIZE];
const int r = read(act->fd, filedata, FILE_ID_SIZE);
if(r == FILE_ID_SIZE) {
get_file_id_hash(filedata, sizeof(filedata), act->file_id, sizeof(act->file_id));
dbgprintf("file_id '%s' obtained, closing monitoring file handle\n", act->file_id);
close(act->fd); /* we will never go here! */
act->fd = -1;
} else {
DBGPRINTF("getFileID partial or error read, ret %d\n", r);
}
DBGPRINTF("getFileID for '%s', file_id_hash '%s'\n", act->name, act->file_id);
}
/* this generates a state file name suitable for the given file. To avoid
@ -1244,7 +1257,7 @@ getStateFileName(const act_obj_t *const act,
{
DBGPRINTF("getStateFileName for '%s'\n", act->name);
snprintf((char*)buf, lenbuf - 1, "imfile-state:%lld", (long long) act->ino);
DBGPRINTF("getStateFileName: stat file name now is %s\n", buf);
DBGPRINTF("getStateFileName: state file name now is %s\n", buf);
return buf;
}
@ -1321,21 +1334,20 @@ openFileWithStateFile(act_obj_t *const act)
DEFiRet;
uchar pszSFNam[MAXFNAME];
uchar statefile[MAXFNAME];
char file_id[FILE_ID_HASH_SIZE];
int fd = -1;
const instanceConf_t *const inst = act->edge->instarr[0];// TODO: same file, multiple instances?
uchar *const statefn = getStateFileName(act, statefile, sizeof(statefile));
getFileID(act, file_id, sizeof(file_id));
getFileID(act);
getFullStateFileName(statefn, file_id, pszSFNam, sizeof(pszSFNam));
getFullStateFileName(statefn, act->file_id, pszSFNam, sizeof(pszSFNam));
DBGPRINTF("trying to open state for '%s', state file '%s'\n", act->name, pszSFNam);
/* check if the file exists */
fd = open((char*)pszSFNam, O_CLOEXEC | O_NOCTTY | O_RDONLY, 0600);
if(fd < 0) {
if(errno == ENOENT) {
if(file_id[0] != '\0') {
if(act->file_id[0] != '\0') {
const char *pszSFNamHash = strdup((const char*)pszSFNam);
CHKmalloc(pszSFNamHash);
DBGPRINTF("state file %s for %s does not exist - trying to see if "
@ -1343,6 +1355,8 @@ openFileWithStateFile(act_obj_t *const act)
getFullStateFileName(statefn, "", pszSFNam, sizeof(pszSFNam));
fd = open((char*)pszSFNam, O_CLOEXEC | O_NOCTTY | O_RDONLY, 0600);
if(fd >= 0) {
dbgprintf("found inode-only state file, renaming it now that we "
"know the file_id, new name: %s\n", pszSFNamHash);
/* we now can use identify the file, so let's rename it */
if(rename((const char*)pszSFNam, pszSFNamHash) != 0) {
LogError(errno, RS_RET_IO_ERROR,
@ -1509,6 +1523,7 @@ pollFileReal(act_obj_t *act, cstr_t **pCStr)
{
int64 strtOffs;
DEFiRet;
int64_t startOffs = 0;
int nProcessed = 0;
regex_t *start_preg = NULL, *end_preg = NULL;
@ -1525,6 +1540,7 @@ pollFileReal(act_obj_t *act, cstr_t **pCStr)
start_preg = (inst->startRegex == NULL) ? NULL : &inst->start_preg;
end_preg = (inst->endRegex == NULL) ? NULL : &inst->end_preg;
startOffs = act->pStrm->iCurrOffs;
/* loop below will be exited when strmReadLine() returns EOF */
while(glbl.GetGlobalInputTermState() == 0) {
if(inst->maxLinesAtOnce != 0 && nProcessed >= inst->maxLinesAtOnce)
@ -1537,6 +1553,12 @@ pollFileReal(act_obj_t *act, cstr_t **pCStr)
inst->escapeLF, inst->discardTruncatedMsg, inst->msgDiscardingError, &strtOffs));
}
++nProcessed;
if(startOffs < FILE_ID_SIZE && act->pStrm->iCurrOffs >= FILE_ID_SIZE) {
dbgprintf("initiating state file write as sufficient data is now present; file=%s\n",
act->name);
persistStrmState(act);
startOffs = act->pStrm->iCurrOffs; /* disable check */
}
runModConf->bHadFileData = 1; /* this is just a flag, so set it and forget it */
CHKiRet(enqLine(act, *pCStr, strtOffs)); /* process line */
rsCStrDestruct(pCStr); /* discard string (must be done by us!) */
@ -2549,13 +2571,12 @@ static rsRetVal ATTR_NONNULL()
persistStrmState(act_obj_t *const act)
{
DEFiRet;
char file_id[FILE_ID_HASH_SIZE];
uchar statefile[MAXFNAME];
uchar statefname[MAXFNAME];
uchar *const statefn = getStateFileName(act, statefile, sizeof(statefile));
getFileID(act, file_id, sizeof(file_id));
getFullStateFileName(statefn, file_id, statefname, sizeof(statefname));
getFileID(act);
getFullStateFileName(statefn, act->file_id, statefname, sizeof(statefname));
DBGPRINTF("persisting state for '%s', state file '%s'\n", act->name, statefname);
struct json_object *jval = NULL;

View File

@ -10,7 +10,7 @@ CLEANFILES=\
work \
test-spool test-logdir stat-file1 \
rsyslog.pipe rsyslog.input.* \
rsyslog.input rsyslog.input.* imfile-state* omkafka-failed.data \
rsyslog.input rsyslog.input.* imfile-state:* omkafka-failed.data \
rsyslog.input-symlink.log rsyslog-link.*.log targets \
HOSTNAME \
rstb_* \
@ -1159,6 +1159,8 @@ TESTS += \
imfile-endregex-timeout-with-shutdown.sh \
imfile-endregex-timeout-with-shutdown-polling.sh \
imfile-endmsg.regex.sh \
imfile-statefile-no-file_id.sh \
imfile-statefile-no-file_id-TO-file_id.sh \
imfile-persist-state-1.sh \
imfile-freshStartTail1.sh \
imfile-freshStartTail2.sh \
@ -1178,6 +1180,7 @@ TESTS += \
imfile-symlink.sh \
imfile-symlink-multi.sh \
imfile-logrotate.sh \
imfile-logrotate-multiple.sh \
imfile-logrotate-copytruncate.sh \
imfile-logrotate-nocopytruncate.sh \
imfile-growing-file-id.sh \
@ -1820,6 +1823,8 @@ EXTRA_DIST= \
imfile-endmsg.regex-with-example-vg.sh \
imfile-endmsg.regex.crio.rulebase \
imfile-endmsg.regex.json.rulebase \
imfile-statefile-no-file_id.sh \
imfile-statefile-no-file_id-TO-file_id.sh \
imfile-persist-state-1.sh \
imfile-freshStartTail1.sh \
imfile-freshStartTail2.sh \
@ -1843,6 +1848,7 @@ EXTRA_DIST= \
imfile-logrotate.sh \
imfile-logrotate-copytruncate.sh \
imfile-logrotate-nocopytruncate.sh \
imfile-logrotate-multiple.sh \
imfile-growing-file-id.sh \
glbl-oversizeMsg-truncate-imfile.sh \
dynfile_invld_async.sh \

View File

@ -1285,7 +1285,7 @@ exit_test() {
rm -f rsyslog.action.*.include
rm -f work rsyslog.out.* xlate*.lkp_tbl
rm -rf test-logdir stat-file1
rm -f rsyslog.conf.tlscert stat-file1 rsyslog.empty imfile-state*
rm -f rsyslog.conf.tlscert stat-file1 rsyslog.empty imfile-state:*
rm -rf rsyslog-link.*.log targets
rm -f ${TESTCONF_NM}.conf
rm -f tmp.qi nocert
@ -1316,7 +1316,7 @@ get_inode() {
printf 'FAIL: file "%s" does not exist in get_inode\n' "$1"
error_exit 100
fi
stat -c '%i' "$1"
python -c 'import os; import stat; print os.lstat("'$1'")[stat.ST_INO]'
}
@ -2130,7 +2130,7 @@ case $1 in
rm -f log log* # RSyslog debug output
rm -f work
rm -rf test-logdir stat-file1
rm -f rsyslog.empty imfile-state* omkafka-failed.data
rm -f rsyslog.empty imfile-state:* omkafka-failed.data
rm -rf rsyslog-link.*.log targets
rm -f tmp.qi nocert
rm -f core.* vgcore.* core*

View File

@ -0,0 +1,85 @@
#!/bin/bash
# This is part of the rsyslog testbench, licensed under ASL 2.0
. $srcdir/diag.sh check-inotify-only
. ${srcdir:=.}/diag.sh init
check_command_available logrotate
export IMFILEROTATES="10"
export TESTMESSAGES=10000
export TESTMESSAGESFULL=$((IMFILEROTATES * TESTMESSAGES-1))
generate_conf
add_conf '
global(
workDirectory="'$RSYSLOG_DYNNAME'.spool"
/* Filter out busy debug output */
debug.whitelist="off"
debug.files=["omfile.c", "queue.c", "rainerscript.c", "ratelimit.c", "ruleset.c", "main Q", "msg.c", "../action.c"]
)
module(load="../plugins/imfile/.libs/imfile" mode="inotify")
input(type="imfile" File="./'$RSYSLOG_DYNNAME'.input"
Tag="file:" Severity="error" Facility="local7" addMetadata="on")
$template outfmt,"%msg:F,58:2%\n"
if $msg contains "msgnum:" then
action(
type="omfile"
file=`echo $RSYSLOG_OUT_LOG`
template="outfmt"
)
'
# Write logrotate config file
echo '"./'$RSYSLOG_DYNNAME'.input"
{
create
daily
missingok
rotate 14
notifempty
compress
delaycompress
}' > $RSYSLOG_DYNNAME.logrotate
display_file() {
printf '\nFILE %s content:\n' $1
cat -n $1
}
startup
TESTMESSAGESSTART=0
for i in $(seq 1 $IMFILEROTATES);
do
#printf 'PRESS ENTER TO CONTINUE\n'
#read
printf '\n\nNEW RUN:\n'
./inputfilegen -m $TESTMESSAGES -i $TESTMESSAGESSTART > $RSYSLOG_DYNNAME.input
ls -li $RSYSLOG_DYNNAME.input*
echo ls ${RSYSLOG_DYNNAME}.spool:
ls -li ${RSYSLOG_DYNNAME}.spool
echo STATE FILE CONTENT:
shopt -s extglob
for filename in "$RSYSLOG_DYNNAME.spool"/imfile-state:*; do display_file $filename; done
# Wait until testmessages are processed by imfile!
msgcount=$((i * TESTMESSAGES-1))
# echo "TESTMESSAGESSTART: $TESTMESSAGESSTART - TotalMsgCount: $msgcount"
wait_file_lines $RSYSLOG_OUT_LOG $msgcount $RETRIES
# Logrotate on logfile
logrotate --state $RSYSLOG_DYNNAME.logrotate.state -f $RSYSLOG_DYNNAME.logrotate
TESTMESSAGESSTART=$((TESTMESSAGESSTART+TESTMESSAGES))
done
shutdown_when_empty
wait_shutdown
seq_check 0 $TESTMESSAGESFULL
exit_test

View File

@ -22,9 +22,9 @@ if $msg contains "msgnum:" then
'
# generate input file first. Note that rsyslog processes it as
# soon as it start up (so the file should exist at that point).
./inputfilegen 5 4000 > $RSYSLOG_DYNNAME.input
./inputfilegen -m5 -d4000 > $RSYSLOG_DYNNAME.input
startup
shutdown_when_empty # shut down rsyslogd when done processing messages
wait_shutdown # we need to wait until rsyslogd is finished!
shutdown_when_empty
wait_shutdown
seq_check 0 3
exit_test

View File

@ -0,0 +1,49 @@
#!/bin/bash
# added 2019-02-28 by rgerhards
# This is part of the rsyslog testbench, licensed under ASL 2.0
. ${srcdir:=.}/diag.sh init
generate_conf
add_conf '
global(workDirectory="'${RSYSLOG_DYNNAME}'.spool")
module(load="../plugins/imfile/.libs/imfile")
input(type="imfile" tag="file:" file="./'$RSYSLOG_DYNNAME'.input")
template(name="outfmt" type="string" string="%msg:F,58:2%\n")
if $msg contains "msgnum:" then
action(type="omfile" file=`echo $RSYSLOG_OUT_LOG` template="outfmt")
'
# generate a very small file so that imfile cannot generate file_id
./inputfilegen -m1 > $RSYSLOG_DYNNAME.input
startup
shutdown_when_empty
wait_shutdown
seq_check 0 0 # check we got the (one) message correctly
# and also check state file name is correct:
inode=$(get_inode "$RSYSLOG_DYNNAME.input")
if [ ! -f "$RSYSLOG_DYNNAME.spool/imfile-state:$inode" ]; then
printf 'FAIL: state file name incorrect,\nexpected \"%s\"\nspool dir is:\n' \
$RSYSLOG_DYNNAME.spool/imfile-state:$inode
ls -l $RSYSLOG_DYNNAME.spool
error_exit 1
fi
printf '\nSTAGE 1 OK - inode-only state file properly generated\n\n'
# now add data to the input file, up to a point where we have file_id.
# rsyslog must update state file name AND remove inode-only one.
./inputfilegen -m999 -i1 >> $RSYSLOG_DYNNAME.input
startup
wait_file_lines "$RSYSLOG_OUT_LOG" 1000
shutdown_when_empty
wait_shutdown
seq_check 0 999 # check we got the messages correctly
# and verify the state files are correct
if [ -f "$RSYSLOG_DYNNAME.spool/imfile-state:$inode" ]; then
printf 'FAIL: inode-only state file still exists, but must not.\nincorrect name: \"%s\"\nspool dir is:\n' \
$RSYSLOG_DYNNAME.spool/imfile-state:$inode
ls -l $RSYSLOG_DYNNAME.spool
error_exit 1
fi
ls -l $RSYSLOG_DYNNAME.spool
exit_test

View File

@ -0,0 +1,45 @@
#!/bin/bash
# added 2019-02-28 by rgerhards
# This is part of the rsyslog testbench, licensed under ASL 2.0
. ${srcdir:=.}/diag.sh init
generate_conf
add_conf '
global(workDirectory="'${RSYSLOG_DYNNAME}'.spool")
module(load="../plugins/imfile/.libs/imfile")
input(type="imfile" tag="file:" file="./'$RSYSLOG_DYNNAME'.input")
template(name="outfmt" type="string" string="%msg:F,58:2%\n")
if $msg contains "msgnum:" then
action(type="omfile" file=`echo $RSYSLOG_OUT_LOG` template="outfmt")
'
# generate a very small file so that imfile cannot generate file_id
./inputfilegen -m1 > $RSYSLOG_DYNNAME.input
startup
shutdown_when_empty
wait_shutdown
seq_check 0 0 # check we got the messages correctly
# and also check state file name is correct:
inode=$(get_inode "$RSYSLOG_DYNNAME.input")
printf '\nSTAGE 1 OK - inode-only state file properly generated\n\n'
# now add small amount of data, so that file_id can still not be
# generated. Check that state file remains valid.
# rsyslog must update state file name AND remove inode-only one.
./inputfilegen -m3 -i1 >> $RSYSLOG_DYNNAME.input
startup
wait_file_lines "$RSYSLOG_OUT_LOG" 4
shutdown_when_empty
wait_shutdown
seq_check 0 3 # check we got the messages correctly
# and verify the state files are correct
if [ ! -f "$RSYSLOG_DYNNAME.spool/imfile-state:$inode" ]; then
printf 'FAIL: STAGE 2 state file name incorrect,\nexpected \"%s\"\nspool dir is:\n' \
$RSYSLOG_DYNNAME.spool/imfile-state:$inode
ls -l $RSYSLOG_DYNNAME.spool
error_exit 1
fi
printf '\nSTAGE 2 OK - inode-only state file properly maintained\n\n'
exit_test