mmsnareparse: preserve regex end-anchor semantics

Why: Regex trailing extra-data detection must not let a bounded search
window change the meaning of end-anchored patterns.

Impact: Prevents false trailing-token removal for regex patterns that
match only a truncated prefix.

Before/After: Before, '$' could match an artificial NUL at the search
limit; after, it only matches a real token end.

Technical Overview: When the regex input is temporarily NUL-terminated
at the configured search limit, pass REG_NOTEOL to regexec(). This keeps
the bounded input optimization and the start-anchored behavior, but
prevents '$' from matching the artificial boundary. Add a regression test
with an end-anchored numeric pattern and a longer alphanumeric final
token to prove the token remains parsed data instead of extradata.

With the help of AI-Agents: GPT-5.3-Codex
This commit is contained in:
Rainer Gerhards 2026-05-29 10:19:15 +02:00
parent 2817c88815
commit bd71ea0083
3 changed files with 49 additions and 2 deletions

View File

@ -5187,7 +5187,8 @@ static char *detect_and_truncate_trailing_extradata(instanceData *pData, char *m
searchStart[pData->searchLimit] = '\0';
}
const int isMatch = !regexec(&pData->ignoreTrailingPattern_preg, searchStart, 0, NULL, 0);
const int regexecFlags = tokenWasTruncated ? REG_NOTEOL : 0;
const int isMatch = !regexec(&pData->ignoreTrailingPattern_preg, searchStart, 0, NULL, regexecFlags);
if (tokenWasTruncated) {
searchStart[pData->searchLimit] = savedChar;
}

View File

@ -498,7 +498,8 @@ TESTS_MMSNAREPARSE_MINIMAL = \
mmsnareparse-custom.sh \
mmsnareparse-realworld-4624-4634-5140.sh \
mmsnareparse-trailing-extradata.sh \
mmsnareparse-trailing-extradata-regex.sh
mmsnareparse-trailing-extradata-regex.sh \
mmsnareparse-trailing-extradata-regex-anchor.sh
TESTS_MMSNAREPARSE_VALGRIND = \
mmsnareparse-comprehensive-vg.sh

View File

@ -0,0 +1,45 @@
#!/bin/bash
# Validate that mmsnareparse regex trailing extra-data detection does not let
# the search-window boundary act as the end of the trailing token. The oracle is
# that an end-anchored pattern must not match only the bounded prefix of a longer
# non-matching token, so the final token remains parsed data and no
# extradata_section is emitted for it.
unset RSYSLOG_DYNNAME
. ${srcdir:=.}/diag.sh init
generate_conf
add_conf '
module(load="../plugins/mmsnareparse/.libs/mmsnareparse")
template(name="outfmt" type="list") {
property(name="$!win!Event!EventID")
constant(value=",")
property(name="$!win!Event!Channel")
constant(value=",")
property(name="$!win!EventData!User")
constant(value=",")
property(name="$!extradata_section")
constant(value="\n")
}
action(type="mmsnareparse"
definition.file="../plugins/mmsnareparse/sysmon_definitions.json"
ignoreTrailingPattern.regex="^[0-9]+$"
ignoreTrailingPattern.searchWindow="3")
action(type="omfile" file="'$RSYSLOG_OUT_LOG'" template="outfmt")
'
startup
cat <<'MSG' > ${RSYSLOG_DYNNAME}.input
<14>Mar 22 08:47:23 testhost MSWinEventLog 1 Microsoft-Windows-Sysmon/Operational 20977 Mon Mar 22 08:47:23 2025 13 Windows SYSTEM User SetValue testhost Registry value set (rule: RegistryEvent) Registry value set: RuleName: Default RegistryEvent EventType: SetValue UtcTime: 2025-03-22 08:47:23.284 ProcessGuid: {fd4d0da6-d589-6916-eb03-000000000000} ProcessId: 4 Image: System TargetObject: HKLM\System\CurrentControlSet\Services\TestService\ImagePath Details: "C:\Program Files\TestAgent\TestService.exe" User: NT AUTHORITY\SYSTEM 123abc
MSG
injectmsg_file ${RSYSLOG_DYNNAME}.input
shutdown_when_empty
wait_shutdown
content_check '13,Microsoft-Windows-Sysmon/Operational,NT AUTHORITY\SYSTEM 123abc,' "$RSYSLOG_OUT_LOG"
exit_test