rsyslog/tools/pmrfc5424.c
Rainer Gerhards b326c76f45 style: normalize C source formatting via clang-format (PoC)
This commit applies the new canonical formatting style using `clang-format` with custom settings (notably 4-space indentation), as part of our shift toward automated formatting normalization.

⚠️ No functional changes are included — only whitespace and layout modifications as produced by `clang-format`.

This change is part of the formatting modernization strategy discussed in:
https://github.com/rsyslog/rsyslog/issues/5747

Key context:
- Formatting is now treated as a disposable view, normalized via tooling.
- The `.clang-format` file defines the canonical style.
- A fixup script (`devtools/format-code.sh`) handles remaining edge cases.
- Formatting commits are added to `.git-blame-ignore-revs` to reduce noise.
- Developers remain free to format code however they prefer locally.
2025-07-16 13:56:21 +02:00

320 lines
10 KiB
C

/* pmrfc5424.c
* This is a parser module for RFC5424-formatted messages.
*
* NOTE: read comments in module-template.h to understand how this file
* works!
*
* File begun on 2009-11-03 by RGerhards
*
* Copyright 2007-2015 Rainer Gerhards and Adiscon GmbH.
*
* This file is part of rsyslog.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* -or-
* see COPYING.ASL20 in the source distribution
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "config.h"
#include "rsyslog.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include "syslogd.h"
#include "conf.h"
#include "syslogd-types.h"
#include "template.h"
#include "msg.h"
#include "module-template.h"
#include "glbl.h"
#include "errmsg.h"
#include "parser.h"
#include "datetime.h"
#include "unicode-helper.h"
MODULE_TYPE_PARSER
MODULE_TYPE_NOKEEP;
PARSER_NAME("rsyslog.rfc5424")
/* internal structures
*/
DEF_PMOD_STATIC_DATA;
DEFobjCurrIf(glbl) DEFobjCurrIf(parser) DEFobjCurrIf(datetime)
/* config data */
BEGINisCompatibleWithFeature CODESTARTisCompatibleWithFeature;
if (eFeat == sFEATUREAutomaticSanitazion) iRet = RS_RET_OK;
if (eFeat == sFEATUREAutomaticPRIParsing) iRet = RS_RET_OK;
ENDisCompatibleWithFeature
/* Helper to parseRFCSyslogMsg. This function parses a field up to
* (and including) the SP character after it. The field contents is
* returned in a caller-provided buffer. The parsepointer is advanced
* to after the terminating SP. The caller must ensure that the
* provided buffer is large enough to hold the to be extracted value.
* Returns 0 if everything is fine or 1 if either the field is not
* SP-terminated or any other error occurs. -- rger, 2005-11-24
* The function now receives the size of the string and makes sure
* that it does not process more than that. The *pLenStr counter is
* updated on exit. -- rgerhards, 2009-09-23
*/
static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) {
uchar *p2parse;
int iRet = 0;
assert(pp2parse != NULL);
assert(*pp2parse != NULL);
assert(pResult != NULL);
p2parse = *pp2parse;
/* this is the actual parsing loop */
while (*pLenStr > 0 && *p2parse != ' ') {
*pResult++ = *p2parse++;
--(*pLenStr);
}
if (*pLenStr > 0 && *p2parse == ' ') {
++p2parse; /* eat SP, but only if not at end of string */
--(*pLenStr);
} else {
iRet = 1; /* there MUST be an SP! */
}
*pResult = '\0';
/* set the new parse pointer */
*pp2parse = p2parse;
return iRet;
}
/* Helper to parseRFCSyslogMsg. This function parses the structured
* data field of a message. It does NOT parse inside structured data,
* just gets the field as whole. Parsing the single entities is left
* to other functions. The parsepointer is advanced
* to after the terminating SP. The caller must ensure that the
* provided buffer is large enough to hold the to be extracted value.
* Returns 0 if everything is fine or 1 if either the field is not
* SP-terminated or any other error occurs. -- rger, 2005-11-24
* The function now receives the size of the string and makes sure
* that it does not process more than that. The *pLenStr counter is
* updated on exit. -- rgerhards, 2009-09-23
*/
static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) {
uchar *p2parse;
int bCont = 1;
int iRet = 0;
int lenStr;
assert(pp2parse != NULL);
assert(*pp2parse != NULL);
assert(pResult != NULL);
p2parse = *pp2parse;
lenStr = *pLenStr;
/* this is the actual parsing loop
* Remeber: structured data starts with [ and includes any characters
* until the first ] followed by a SP. There may be spaces inside
* structured data. There may also be \] inside the structured data, which
* do NOT terminate an element.
*/
if (lenStr == 0 || (*p2parse != '[' && *p2parse != '-')) return 1; /* this is NOT structured data! */
if (*p2parse == '-') { /* empty structured data? */
*pResult++ = '-';
++p2parse;
--lenStr;
} else {
while (bCont) {
if (lenStr < 2) {
/* we now need to check if we have only structured data */
if (lenStr > 0 && *p2parse == ']') {
*pResult++ = *p2parse;
p2parse++;
lenStr--;
bCont = 0;
} else {
iRet = 1; /* this is not valid! */
bCont = 0;
}
} else if (*p2parse == '\\' && *(p2parse + 1) == ']') {
/* this is escaped, need to copy both */
*pResult++ = *p2parse++;
*pResult++ = *p2parse++;
lenStr -= 2;
} else if (*p2parse == ']' && *(p2parse + 1) == ' ') {
/* found end, just need to copy the ] and eat the SP */
*pResult++ = *p2parse;
p2parse += 1;
lenStr -= 1;
bCont = 0;
} else {
*pResult++ = *p2parse++;
--lenStr;
}
}
}
if (lenStr > 0 && *p2parse == ' ') {
++p2parse; /* eat SP, but only if not at end of string */
--lenStr;
} else {
iRet = 1; /* there MUST be an SP! */
}
*pResult = '\0';
/* set the new parse pointer */
*pp2parse = p2parse;
*pLenStr = lenStr;
return iRet;
}
/* parse a RFC5424-formatted syslog message. This function returns
* 0 if processing of the message shall continue and 1 if something
* went wrong and this messe should be ignored. This function has been
* implemented in the effort to support syslog-protocol. Please note that
* the name (parse *RFC*) stems from the hope that syslog-protocol will
* some time become an RFC. Do not confuse this with informational
* RFC 3164 (which is legacy syslog).
*
* currently supported format:
*
* <PRI>VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG
*
* <PRI> is already stripped when this function is entered. VERSION already
* has been confirmed to be "1", but has NOT been stripped from the message.
*
* rger, 2005-11-24
*/
BEGINparse
uchar *p2parse;
uchar *pBuf = NULL;
int lenMsg;
int bContParse = 1;
CODESTARTparse;
assert(pMsg != NULL);
assert(pMsg->pszRawMsg != NULL);
p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */
lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI;
/* check if we are the right parser */
if (lenMsg < 2 || p2parse[0] != '1' || p2parse[1] != ' ') {
ABORT_FINALIZE(RS_RET_COULD_NOT_PARSE);
}
DBGPRINTF("Message has RFC5424/syslog-protocol format.\n");
setProtocolVersion(pMsg, MSG_RFC5424_PROTOCOL);
p2parse += 2;
lenMsg -= 2;
/* Now get us some memory we can use as a work buffer while parsing.
* We simply allocated a buffer sufficiently large to hold all of the
* message, so we can not run into any troubles. I think this is
* wiser than to use individual buffers.
*/
CHKmalloc(pBuf = malloc(lenMsg + 1));
/* IMPORTANT NOTE:
* Validation is not actually done below nor are any errors handled. I have
* NOT included this for the current proof of concept. However, it is strongly
* advisable to add it when this code actually goes into production.
* rgerhards, 2005-11-24
*/
/* TIMESTAMP */
if (lenMsg >= 2 && p2parse[0] == '-' && p2parse[1] == ' ') {
memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime));
p2parse += 2;
lenMsg -= 2;
} else if (datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) {
if (pMsg->msgFlags & IGNDATE) {
/* we need to ignore the msg data, so simply copy over reception date */
memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime));
}
} else {
DBGPRINTF("no TIMESTAMP detected!\n");
bContParse = 0;
}
/* HOSTNAME */
if (bContParse) {
parseRFCField(&p2parse, pBuf, &lenMsg);
MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf));
}
/* APP-NAME */
if (bContParse) {
parseRFCField(&p2parse, pBuf, &lenMsg);
MsgSetAPPNAME(pMsg, (char *)pBuf);
}
/* PROCID */
if (bContParse) {
parseRFCField(&p2parse, pBuf, &lenMsg);
MsgSetPROCID(pMsg, (char *)pBuf);
}
/* MSGID */
if (bContParse) {
parseRFCField(&p2parse, pBuf, &lenMsg);
MsgSetMSGID(pMsg, (char *)pBuf);
}
/* STRUCTURED-DATA */
if (bContParse) {
parseRFCStructuredData(&p2parse, pBuf, &lenMsg);
MsgSetStructuredData(pMsg, (char *)pBuf);
}
/* MSG */
MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg);
finalize_it:
if (pBuf != NULL) free(pBuf);
ENDparse
BEGINmodExit
CODESTARTmodExit;
/* release what we no longer need */
objRelease(glbl, CORE_COMPONENT);
objRelease(parser, CORE_COMPONENT);
objRelease(datetime, CORE_COMPONENT);
ENDmodExit
BEGINqueryEtryPt
CODESTARTqueryEtryPt;
CODEqueryEtryPt_STD_PMOD_QUERIES;
CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES;
ENDqueryEtryPt
BEGINmodInit(pmrfc5424)
CODESTARTmodInit;
*ipIFVersProvided = CURR_MOD_IF_VERSION; /* we only support the current interface specification */
CODEmodInit_QueryRegCFSLineHdlr CHKiRet(objUse(glbl, CORE_COMPONENT));
CHKiRet(objUse(parser, CORE_COMPONENT));
CHKiRet(objUse(datetime, CORE_COMPONENT));
dbgprintf("rfc5424 parser init called\n");
dbgprintf("GetParserName addr %p\n", GetParserName);
ENDmodInit
/* vim:set ai:
*/