enhanced property replacer's regex to support submatches

- enabled Posix ERE expressions inside the property replacer
  (previously BRE was permitted only)
- provided ability to specify that a regular expression submatch shall
  be used inside the property replacer
This commit is contained in:
Rainer Gerhards 2008-05-29 12:48:15 +02:00
parent 1644e9fabc
commit 99f18190a1
5 changed files with 80 additions and 14 deletions

View File

@ -1,5 +1,9 @@
---------------------------------------------------------------------------
Version 3.19.5 (rgerhards), 2008-05-??
- enabled Posix ERE expressions inside the property replacer
(previously BRE was permitted only)
- provided ability to specify that a regular expression submatch shall
be used inside the property replacer
---------------------------------------------------------------------------
Version 3.19.4 (rgerhards), 2008-05-27
- implemented x509/certvalid gtls auth mode

View File

@ -204,8 +204,19 @@ not become part of it. If you are using regular expressions, the
property replacer will return the part of the property text that
matches the regular expression. An example for a property replacer
sequence with a regular expression is: "%msg:R:.*Sev:. \(.*\)
\[.*--end%"<br>
</p>
\[.*--end%"</p>
<p>It is possible to specify some parametes after the "R". These are
comma-separated. They are:
<p>R,&lt;regexp-type&gt;,&lt;submatch&gt;
<p>regexp-type is either "BRE" for Posix basic regular expressions or
"ERE" for extended ones. The string must be given in upper case. The
default is "BRE" to be consistent with earlier versions of rsyslog that
did not support ERE. The submatch identifies the submatch to be used
with the result. A single digit is supported. Match 0 is the full match,
while 1 to 9 are the acutal submatches.
<p>The following is a sample of an ERE expression that takes the first
submatch from the message string:
<p>%msg:R,ERE,1:for (vlan[0-9]*):--end%
<p><b>Also, extraction can be done based on so-called
"fields"</b>. To do so, place a "F" into FromChar. A field in its
current definition is anything that is delimited by a delimiter

View File

@ -1605,8 +1605,8 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe,
#ifdef FEATURE_REGEXP
/* Variables necessary for regular expression matching */
size_t nmatch = 1;
regmatch_t pmatch[1];
size_t nmatch = 10;
regmatch_t pmatch[10];
#endif
assert(pMsg != NULL);
@ -1839,7 +1839,7 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe,
/* Could not compile regex before! */
return "**NO MATCH** **BAD REGULAR EXPRESSION**";
dbgprintf("debug: String to match for regex is: %s\n", pRes);
dbgprintf("string to match for regex is: %s\n", pRes);
if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) {
if (0 != regexp.regexec(&pTpe->data.field.re, pRes, nmatch, pmatch, 0)) {
@ -1850,12 +1850,26 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe,
}
return "**NO MATCH**";
} else {
/* Match! */
/* I need to malloc pB */
{int i; for(i = 0 ; i < 10 ; ++i) {
dbgprintf("rqtd regex match (nmatch %d) # %d, idx %d: so %d, eo %d\n", nmatch, pTpe->data.field.iMatchToUse, i,
pmatch[i].rm_so,
pmatch[i].rm_eo);
}}
/* Match- but did it match the one we wanted? */
/* we got no match! */
if(pmatch[pTpe->data.field.iMatchToUse].rm_so == -1) {
if (*pbMustBeFreed == 1) {
free(pRes);
*pbMustBeFreed = 0;
}
return "**NO MATCH**";
}
/* OK, we have a usable match - we now need to malloc pB */
int iLenBuf;
char *pB;
iLenBuf = pmatch[0].rm_eo - pmatch[0].rm_so;
iLenBuf = pmatch[pTpe->data.field.iMatchToUse].rm_eo
- pmatch[pTpe->data.field.iMatchToUse].rm_so;
pB = (char *) malloc((iLenBuf + 1) * sizeof(char));
if (pB == NULL) {
@ -1866,7 +1880,7 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe,
}
/* Lets copy the matched substring to the buffer */
memcpy(pB, pRes + pmatch[0].rm_so, iLenBuf);
memcpy(pB, pRes + pmatch[pTpe->data.field.iMatchToUse].rm_so, iLenBuf);
pB[iLenBuf] = '\0';/* terminate string, did not happen before */
if (*pbMustBeFreed == 1)

View File

@ -518,6 +518,34 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl)
/* APR: R found! regex alarm ! :) */
++p; /* eat ':' */
/* first come the regex type */
if(*p == ',') {
++p; /* eat ',' */
if(*p == 'B' && *(p+1) == 'R' && *(p+2) == 'E' && *(p+3) == ',') {
pTpe->data.field.typeRegex = TPL_REGEX_BRE;
p += 3; /* eat indicator sequence */
} else if(*p == 'E' && *(p+1) == 'R' && *(p+2) == 'E' && *(p+3) == ',') {
pTpe->data.field.typeRegex = TPL_REGEX_ERE;
p += 3; /* eat indicator sequence */
} else {
errmsg.LogError(NO_ERRCODE, "error: invalid regular expression type, rest of line %s",
(char*) p);
}
}
/* now check for submatch ID */
pTpe->data.field.iMatchToUse = 0;
if(*p == ',') {
/* in this case a number follows, which indicates which match
* shall be used. This must be a single digit.
*/
++p; /* eat ',' */
if(isdigit((int) *p)) {
pTpe->data.field.iMatchToUse = *p - '0';
++p; /* eat digit */
}
}
if(*p != ':') {
/* There is something more than an R , this is invalid ! */
/* Complain on extra characters */
@ -525,6 +553,8 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl)
(char*) *pp);
} else {
pTpe->data.field.has_regex = 1;
dbgprintf("we have a regexp and use match #%d\n",
pTpe->data.field.iMatchToUse);
}
} else {
/* now we fall through the "regular" FromPos code */
@ -620,8 +650,9 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl)
/* Now i compile the regex */
/* Remember that the re is an attribute of the Template entry */
if((iRetLocal = objUse(regexp, LM_REGEXP_FILENAME)) == RS_RET_OK) {
dbgprintf("compile data.field.re ptr: %p (pTpe %p)\n", (&(pTpe->data.field.re)), pTpe);
if(regexp.regcomp(&(pTpe->data.field.re), (char*) regex_char, 0) != 0) {
int iOptions;
iOptions = (pTpe->data.field.typeRegex == TPL_REGEX_ERE) ? REG_EXTENDED : 0;
if(regexp.regcomp(&(pTpe->data.field.re), (char*) regex_char, iOptions) != 0) {
dbgprintf("error: can not compile regex: '%s'\n", regex_char);
pTpe->data.field.has_regex = 2;
}

View File

@ -67,7 +67,13 @@ struct templateEntry {
unsigned iToPos; /* up to that one... */
#ifdef FEATURE_REGEXP
regex_t re; /* APR: this is the regular expression */
unsigned has_regex;
short has_regex;
short iMatchToUse;/* which match should be obtained (10 max) */
enum {
TPL_REGEX_BRE = 0, /* posix BRE */
TPL_REGEX_ERE = 1 /* posix ERE */
} typeRegex;
#endif
unsigned has_fields; /* support for field-counting: field to extract */
unsigned char field_delim; /* support for field-counting: field delemiter char */