template procesing/json: performance optimization

This patch improves the performance of important parts of template JSON processing.
2025-12-19 11:30:43 +01:00 · 2022-05-17 10:11:10 +02:00 · 2022-05-17 10:11:10 +02:00 · 6cf38c0ce7
commit 6cf38c0ce7
parent d9870419a9
1 changed files with 118 additions and 38 deletions
--- a/runtime/msg.c
+++ b/runtime/msg.c
@ -7,7 +7,7 @@
 * of the "old" message code without any modifications. However, it
 * helps to have things at the right place one we go to the meat of it.
 *
- * Copyright 2007-2020 Rainer Gerhards and Adiscon GmbH.
+ * Copyright 2007-2022 Rainer Gerhards and Adiscon GmbH.
 *
 * This file is part of the rsyslog runtime library.
 *
@ -3290,13 +3290,17 @@ finalize_it:
 }


-/* Encode a JSON value and add it to provided string. Note that
- * the string object may be NULL. In this case, it is created
- * if and only if escaping is needed. if escapeAll is false, previously
- * escaped strings are left as is
+/* Helper for jsonAddVal(), to be called onces we know there are actually
+ * json escapes inside the string. If so, this function takes over.
+ * Splitting the functions permits us to make some performance optimizations.
+ * For further details, see jsonAddVal().
 */
-static rsRetVal
-jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
+static rsRetVal ATTR_NONNULL(1, 4)
+jsonAddVal_escaped(uchar *const pSrc,
+	const unsigned buflen,
+	const unsigned len_none_escaped_head,
+	es_str_t **dst,
+	const int escapeAll)
 {
 	unsigned char c;
 	es_size_t i;
@ -3304,39 +3308,69 @@ jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
 	unsigned ni;
 	unsigned char nc;
 	int j;
+	uchar wrkbuf[100000];
+	size_t dst_realloc_size;
+	size_t dst_size;
+	uchar *dst_base;
+	uchar *dst_w;
+	uchar *newbuf;
 	DEFiRet;

-	for(i = 0 ; i < buflen ; ++i) {
+	assert(len_none_escaped_head <= buflen);
+	/* first copy over unescaped head string */
+	if(len_none_escaped_head+10 > sizeof(wrkbuf)) {
+		dst_size = 2 * len_none_escaped_head;
+		CHKmalloc(dst_base = malloc(dst_size));
+	} else {
+		dst_size = sizeof(wrkbuf);
+		dst_base = wrkbuf;
+	}
+	dst_realloc_size = dst_size - 10; /* some buffer for escaping */
+	dst_w = dst_base;
+	memcpy(dst_w, pSrc, len_none_escaped_head);
+	dst_w += len_none_escaped_head;
+
+	/* now do the escaping */
+	for(i = len_none_escaped_head ; i < buflen ; ++i) {
+		const size_t dst_offset = dst_w - dst_base;
+		if(dst_offset >= dst_realloc_size) {
+			const size_t new_size = 2 * dst_size;
+			if(dst_base == wrkbuf) {
+				CHKmalloc(newbuf = malloc(new_size));
+				memcpy(newbuf, dst_base, dst_offset);
+			} else {
+				CHKmalloc(newbuf = realloc(dst_base, new_size));
+			}
+			dst_size = new_size;
+			dst_realloc_size = new_size - 10; /* some buffer for escaping */
+			dst_base = newbuf;
+			dst_w = dst_base + dst_offset;
+		}
 		c = pSrc[i];
-		if(   (c >= 0x23 && c <= 0x2e)
-		   || (c >= 0x30 && c <= 0x5b)
+		if(   (c >= 0x30 && c <= 0x5b)
+		   || (c >= 0x23 && c <= 0x2e)
 		   || (c >= 0x5d /* && c <= 0x10FFFF*/)
 		   || c == 0x20 || c == 0x21) {
 			/* no need to escape */
-			if(*dst != NULL)
-				es_addChar(dst, c);
+			*dst_w++ = c;
 		} else {
-			if(*dst == NULL) {
-				if(i == 0) {
-					/* we hope we have only few escapes... */
-					*dst = es_newStr(buflen+10);
-				} else {
-					*dst = es_newStrFromBuf((char*)pSrc, i);
-				}
-				if(*dst == NULL) {
-					ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY);
-				}
-			}
 			/* we must escape, try RFC4627-defined special sequences first */
 			switch(c) {
 			case '\0':
-				es_addBuf(dst, "\\u0000", 6);
+				*dst_w++ = '\\';
+				*dst_w++ = 'u';
+				*dst_w++ = '0';
+				*dst_w++ = '0';
+				*dst_w++ = '0';
+				*dst_w++ = '0';
 				break;
 			case '\"':
-				es_addBuf(dst, "\\\"", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = '"';
 				break;
 			case '/':
-				es_addBuf(dst, "\\/", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = '/';
 				break;
 			case '\\':
 				if (escapeAll == RSFALSE) {
@ -3347,31 +3381,35 @@ jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
 						/* Attempt to not double encode */
 						if (   nc == '"' || nc == '/' || nc == '\\' || nc == 'b' || nc == 'f'
 							|| nc == 'n' || nc == 'r' || nc == 't' || nc == 'u') {
-
-							es_addChar(dst, c);
-							es_addChar(dst, nc);
+							*dst_w++ = c;
+							*dst_w++ = nc;
 							i = ni;
 							break;
 						}
 					}
 				}
-
-				es_addBuf(dst, "\\\\", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = '\\';
 				break;
 			case '\010':
-				es_addBuf(dst, "\\b", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = 'b';
 				break;
 			case '\014':
-				es_addBuf(dst, "\\f", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = 'f';
 				break;
 			case '\n':
-				es_addBuf(dst, "\\n", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = 'n';
 				break;
 			case '\r':
-				es_addBuf(dst, "\\r", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = 'r';
 				break;
 			case '\t':
-				es_addBuf(dst, "\\t", 2);
+				*dst_w++ = '\\';
+				*dst_w++ = 't';
 				break;
 			default:
 				/* TODO : proper Unicode encoding (see header comment) */
@ -3379,12 +3417,54 @@ jsonAddVal(uchar *pSrc, unsigned buflen, es_str_t **dst, int escapeAll)
 					numbuf[3-j] = hexdigit[c % 16];
 					c = c / 16;
 				}
-				es_addBuf(dst, "\\u", 2);
-				es_addBuf(dst, numbuf, 4);
+				*dst_w++ = '\\';
+				*dst_w++ = 'u';
+				*dst_w++ = numbuf[0];
+				*dst_w++ = numbuf[1];
+				*dst_w++ = numbuf[2];
+				*dst_w++ = numbuf[3];
 				break;
 			}
 		}
 	}
+	if(*dst == NULL) {
+		*dst = es_newStrFromBuf((char *) dst_base, dst_w - dst_base);
+	} else {
+		es_addBuf(dst, (const char *) dst_base, dst_w - dst_base);
+	}
+finalize_it:
+	if(dst_base != wrkbuf) {
+		free(dst_base);
+	}
+	RETiRet;
+}
+
+
+/* Encode a JSON value and add it to provided string. Note that
+ * the string object may be NULL. In this case, it is created
+ * if and only if escaping is needed. if escapeAll is false, previously
+ * escaped strings are left as is
+ */
+static rsRetVal ATTR_NONNULL(1, 3)
+jsonAddVal(uchar *const pSrc, const unsigned buflen, es_str_t **dst, const int escapeAll)
+{
+	es_size_t i;
+	DEFiRet;
+
+	for(i = 0 ; i < buflen ; ++i) {
+		const uchar c = pSrc[i];
+		if(! (   (c >= 0x30 && c <= 0x5b)
+		      || (c >= 0x23 && c <= 0x2e)
+		      || (c >= 0x5d /* && c <= 0x10FFFF*/)
+		      || c == 0x20 || c == 0x21)
+		     ) {
+			iRet = jsonAddVal_escaped(pSrc, buflen, i, dst, escapeAll);
+			FINALIZE;
+		}
+	}
+	if(*dst != NULL) {
+		es_addBuf(dst, (const char *) pSrc, buflen);
+	}
 finalize_it:
 	RETiRet;
 }