template procesing/json: performance optimization

This patch improves the performance of important parts of
template JSON processing.
This commit is contained in:
Rainer Gerhards 2022-05-17 10:11:10 +02:00
parent d9870419a9
commit 6cf38c0ce7
No known key found for this signature in database
GPG Key ID: 0CB6B2A8BE80B499

View File

@ -7,7 +7,7 @@
* of the "old" message code without any modifications. However, it
* helps to have things at the right place one we go to the meat of it.
*
* Copyright 2007-2020 Rainer Gerhards and Adiscon GmbH.
* Copyright 2007-2022 Rainer Gerhards and Adiscon GmbH.
*
* This file is part of the rsyslog runtime library.
*
@ -3290,13 +3290,17 @@ finalize_it:
}
/* Encode a JSON value and add it to provided string. Note that
* the string object may be NULL. In this case, it is created
* if and only if escaping is needed. if escapeAll is false, previously
* escaped strings are left as is
/* Helper for jsonAddVal(), to be called onces we know there are actually
* json escapes inside the string. If so, this function takes over.
* Splitting the functions permits us to make some performance optimizations.
* For further details, see jsonAddVal().
*/
static rsRetVal
jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
static rsRetVal ATTR_NONNULL(1, 4)
jsonAddVal_escaped(uchar *const pSrc,
const unsigned buflen,
const unsigned len_none_escaped_head,
es_str_t **dst,
const int escapeAll)
{
unsigned char c;
es_size_t i;
@ -3304,39 +3308,69 @@ jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
unsigned ni;
unsigned char nc;
int j;
uchar wrkbuf[100000];
size_t dst_realloc_size;
size_t dst_size;
uchar *dst_base;
uchar *dst_w;
uchar *newbuf;
DEFiRet;
for(i = 0 ; i < buflen ; ++i) {
assert(len_none_escaped_head <= buflen);
/* first copy over unescaped head string */
if(len_none_escaped_head+10 > sizeof(wrkbuf)) {
dst_size = 2 * len_none_escaped_head;
CHKmalloc(dst_base = malloc(dst_size));
} else {
dst_size = sizeof(wrkbuf);
dst_base = wrkbuf;
}
dst_realloc_size = dst_size - 10; /* some buffer for escaping */
dst_w = dst_base;
memcpy(dst_w, pSrc, len_none_escaped_head);
dst_w += len_none_escaped_head;
/* now do the escaping */
for(i = len_none_escaped_head ; i < buflen ; ++i) {
const size_t dst_offset = dst_w - dst_base;
if(dst_offset >= dst_realloc_size) {
const size_t new_size = 2 * dst_size;
if(dst_base == wrkbuf) {
CHKmalloc(newbuf = malloc(new_size));
memcpy(newbuf, dst_base, dst_offset);
} else {
CHKmalloc(newbuf = realloc(dst_base, new_size));
}
dst_size = new_size;
dst_realloc_size = new_size - 10; /* some buffer for escaping */
dst_base = newbuf;
dst_w = dst_base + dst_offset;
}
c = pSrc[i];
if( (c >= 0x23 && c <= 0x2e)
|| (c >= 0x30 && c <= 0x5b)
if( (c >= 0x30 && c <= 0x5b)
|| (c >= 0x23 && c <= 0x2e)
|| (c >= 0x5d /* && c <= 0x10FFFF*/)
|| c == 0x20 || c == 0x21) {
/* no need to escape */
if(*dst != NULL)
es_addChar(dst, c);
*dst_w++ = c;
} else {
if(*dst == NULL) {
if(i == 0) {
/* we hope we have only few escapes... */
*dst = es_newStr(buflen+10);
} else {
*dst = es_newStrFromBuf((char*)pSrc, i);
}
if(*dst == NULL) {
ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY);
}
}
/* we must escape, try RFC4627-defined special sequences first */
switch(c) {
case '\0':
es_addBuf(dst, "\\u0000", 6);
*dst_w++ = '\\';
*dst_w++ = 'u';
*dst_w++ = '0';
*dst_w++ = '0';
*dst_w++ = '0';
*dst_w++ = '0';
break;
case '\"':
es_addBuf(dst, "\\\"", 2);
*dst_w++ = '\\';
*dst_w++ = '"';
break;
case '/':
es_addBuf(dst, "\\/", 2);
*dst_w++ = '\\';
*dst_w++ = '/';
break;
case '\\':
if (escapeAll == RSFALSE) {
@ -3347,31 +3381,35 @@ jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
/* Attempt to not double encode */
if ( nc == '"' || nc == '/' || nc == '\\' || nc == 'b' || nc == 'f'
|| nc == 'n' || nc == 'r' || nc == 't' || nc == 'u') {
es_addChar(dst, c);
es_addChar(dst, nc);
*dst_w++ = c;
*dst_w++ = nc;
i = ni;
break;
}
}
}
es_addBuf(dst, "\\\\", 2);
*dst_w++ = '\\';
*dst_w++ = '\\';
break;
case '\010':
es_addBuf(dst, "\\b", 2);
*dst_w++ = '\\';
*dst_w++ = 'b';
break;
case '\014':
es_addBuf(dst, "\\f", 2);
*dst_w++ = '\\';
*dst_w++ = 'f';
break;
case '\n':
es_addBuf(dst, "\\n", 2);
*dst_w++ = '\\';
*dst_w++ = 'n';
break;
case '\r':
es_addBuf(dst, "\\r", 2);
*dst_w++ = '\\';
*dst_w++ = 'r';
break;
case '\t':
es_addBuf(dst, "\\t", 2);
*dst_w++ = '\\';
*dst_w++ = 't';
break;
default:
/* TODO : proper Unicode encoding (see header comment) */
@ -3379,12 +3417,54 @@ jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
numbuf[3-j] = hexdigit[c % 16];
c = c / 16;
}
es_addBuf(dst, "\\u", 2);
es_addBuf(dst, numbuf, 4);
*dst_w++ = '\\';
*dst_w++ = 'u';
*dst_w++ = numbuf[0];
*dst_w++ = numbuf[1];
*dst_w++ = numbuf[2];
*dst_w++ = numbuf[3];
break;
}
}
}
if(*dst == NULL) {
*dst = es_newStrFromBuf((char *) dst_base, dst_w - dst_base);
} else {
es_addBuf(dst, (const char *) dst_base, dst_w - dst_base);
}
finalize_it:
if(dst_base != wrkbuf) {
free(dst_base);
}
RETiRet;
}
/* Encode a JSON value and add it to provided string. Note that
* the string object may be NULL. In this case, it is created
* if and only if escaping is needed. if escapeAll is false, previously
* escaped strings are left as is
*/
static rsRetVal ATTR_NONNULL(1, 3)
jsonAddVal(uchar *const pSrc, const unsigned buflen, es_str_t **dst, const int escapeAll)
{
es_size_t i;
DEFiRet;
for(i = 0 ; i < buflen ; ++i) {
const uchar c = pSrc[i];
if(! ( (c >= 0x30 && c <= 0x5b)
|| (c >= 0x23 && c <= 0x2e)
|| (c >= 0x5d /* && c <= 0x10FFFF*/)
|| c == 0x20 || c == 0x21)
) {
iRet = jsonAddVal_escaped(pSrc, buflen, i, dst, escapeAll);
FINALIZE;
}
}
if(*dst != NULL) {
es_addBuf(dst, (const char *) pSrc, buflen);
}
finalize_it:
RETiRet;
}