diff --git a/plugins/imdiag/imdiag.c b/plugins/imdiag/imdiag.c index 607d51e08..a10b20f02 100644 --- a/plugins/imdiag/imdiag.c +++ b/plugins/imdiag/imdiag.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #if HAVE_FCNTL_H #include #endif @@ -55,6 +57,7 @@ #include "queue.h" #include "lookup.h" #include "net.h" /* for permittedPeers, may be removed when this is removed */ +#include "statsobj.h" MODULE_TYPE_INPUT MODULE_TYPE_NOKEEP @@ -68,6 +71,7 @@ DEFobjCurrIf(netstrm) DEFobjCurrIf(errmsg) DEFobjCurrIf(datetime) DEFobjCurrIf(prop) +DEFobjCurrIf(statsobj) /* Module static data */ static tcpsrv_t *pOurTcpsrv = NULL; /* our TCP server(listener) TODO: change for multiple instances */ @@ -76,6 +80,18 @@ static prop_t *pInputName = NULL; /* there is only one global inputName for all static prop_t *pRcvDummy = NULL; static prop_t *pRcvIPDummy = NULL; +statsobj_t *diagStats; +STATSCOUNTER_DEF(potentialArtificialDelayMs, mutPotentialArtificialDelayMs) +STATSCOUNTER_DEF(actualArtificialDelayMs, mutActualArtificialDelayMs) +STATSCOUNTER_DEF(delayInvocationCount, mutDelayInvocationCount) + +static sem_t statsReportingBlocker; +static long statsReportingBlockStartTimeMs = 0; +static int allowOnlyOnce = 0; +DEF_ATOMIC_HELPER_MUT(mutAllowOnlyOnce); +pthread_mutex_t mutStatsReporterWatch; +pthread_cond_t statsReporterWatch; +int statsReported = 0; /* config settings */ struct modConfData_s { @@ -334,6 +350,96 @@ finalize_it: RETiRet; } +static void +imdiag_statsReadCallback(statsobj_t __attribute__((unused)) *ignore_stats, + void __attribute__((unused)) *ignore_ctx) { + long waitStartTimeMs = currentTimeMills(); + sem_wait(&statsReportingBlocker); + long delta = currentTimeMills() - waitStartTimeMs; + if (ATOMIC_DEC_AND_FETCH(&allowOnlyOnce, &mutAllowOnlyOnce) < 0) { + sem_post(&statsReportingBlocker); + } else { + errmsg.LogError(0, RS_RET_OK, "imdiag(stats-read-callback): current stats-reporting " + "cycle will proceed now, next reporting cycle will again be blocked"); + } + + if (pthread_mutex_lock(&mutStatsReporterWatch) == 0) { + statsReported = 1; + pthread_cond_signal(&statsReporterWatch); + pthread_mutex_unlock(&mutStatsReporterWatch); + } + + if (delta > 0) { + STATSCOUNTER_ADD(actualArtificialDelayMs, mutActualArtificialDelayMs, delta); + } +} + +static rsRetVal +blockStatsReporting(tcps_sess_t *pSess) { + DEFiRet; + + sem_wait(&statsReportingBlocker); + CHKiConcCtrl(pthread_mutex_lock(&mutStatsReporterWatch)); + statsReported = 0; + CHKiConcCtrl(pthread_mutex_unlock(&mutStatsReporterWatch)); + ATOMIC_STORE_0_TO_INT(&allowOnlyOnce, &mutAllowOnlyOnce); + statsReportingBlockStartTimeMs = currentTimeMills(); + errmsg.LogError(0, RS_RET_OK, "imdiag: blocked stats reporting"); + CHKiRet(sendResponse(pSess, "next stats reporting call will be blocked\n")); + +finalize_it: + if (iRet != RS_RET_OK) { + errmsg.LogError(0, iRet, "imdiag: block-stats-reporting wasn't successful"); + CHKiRet(sendResponse(pSess, "imdiag::error something went wrong\n")); + } + RETiRet; +} + +static rsRetVal +awaitStatsReport(uchar *pszCmd, tcps_sess_t *pSess) { + uchar subCmd[1024]; + int blockAgain = 0; + DEFiRet; + + getFirstWord(&pszCmd, subCmd, sizeof(subCmd), TO_LOWERCASE); + blockAgain = (ustrcmp(UCHAR_CONSTANT("block_again"), subCmd) == 0); + if (statsReportingBlockStartTimeMs > 0) { + long delta = currentTimeMills() - statsReportingBlockStartTimeMs; + if (blockAgain) { + ATOMIC_STORE_1_TO_INT(&allowOnlyOnce, &mutAllowOnlyOnce); + errmsg.LogError(0, RS_RET_OK, "imdiag: un-blocking ONLY the next cycle of stats reporting"); + } else { + statsReportingBlockStartTimeMs = 0; + errmsg.LogError(0, RS_RET_OK, "imdiag: un-blocking stats reporting"); + } + sem_post(&statsReportingBlocker); + errmsg.LogError(0, RS_RET_OK, "imdiag: stats reporting unblocked"); + STATSCOUNTER_ADD(potentialArtificialDelayMs, mutPotentialArtificialDelayMs, delta); + STATSCOUNTER_INC(delayInvocationCount, mutDelayInvocationCount); + errmsg.LogError(0, RS_RET_OK, "imdiag: will now await next reporting cycle"); + CHKiConcCtrl(pthread_mutex_lock(&mutStatsReporterWatch)); + while (! statsReported) { + CHKiConcCtrl(pthread_cond_wait(&statsReporterWatch, &mutStatsReporterWatch)); + } + statsReported = 0; + CHKiConcCtrl(pthread_mutex_unlock(&mutStatsReporterWatch)); + if (blockAgain) { + statsReportingBlockStartTimeMs = currentTimeMills(); + } + errmsg.LogError(0, RS_RET_OK, "imdiag: stats were reported, wait complete, returning"); + CHKiRet(sendResponse(pSess, "stats reporting was unblocked\n")); + } else { + CHKiRet(sendResponse(pSess, "imdiag::error : stats reporting was not blocked, bug?\n")); + } + +finalize_it: + if (iRet != RS_RET_OK) { + errmsg.LogError(0, iRet, "imdiag: stats-reporting unblock + await-run wasn't successfully completed"); + CHKiRet(sendResponse(pSess, "imdiag::error something went wrong\n")); + } + RETiRet; +} + /* Function to handle received messages. This is our core function! * rgerhards, 2009-05-24 */ @@ -369,6 +475,10 @@ OnMsgReceived(tcps_sess_t *pSess, uchar *pRcv, int iLenMsg) CHKiRet(awaitLookupTableReload(pSess)); } else if(!ustrcmp(cmdBuf, UCHAR_CONSTANT("injectmsg"))) { CHKiRet(injectMsg(pszMsg, pSess)); + } else if(!ustrcmp(cmdBuf, UCHAR_CONSTANT("blockstatsreporting"))) { + CHKiRet(blockStatsReporting(pSess)); + } else if(!ustrcmp(cmdBuf, UCHAR_CONSTANT("awaitstatsreport"))) { + CHKiRet(awaitStatsReport(pszMsg, pSess)); } else { dbgprintf("imdiag unkown command '%s'\n", cmdBuf); CHKiRet(sendResponse(pSess, "unkown command '%s'\n", cmdBuf)); @@ -517,6 +627,12 @@ CODESTARTmodExit /* free some globals to keep valgrind happy */ free(pszInputName); + statsobj.Destruct(&diagStats); + sem_destroy(&statsReportingBlocker); + DESTROY_ATOMIC_HELPER_MUT(mutAllowOnlyOnce); + pthread_cond_destroy(&statsReporterWatch); + pthread_mutex_destroy(&mutStatsReporterWatch); + /* release objects we used */ objRelease(net, LM_NET_FILENAME); objRelease(netstrm, LM_NETSTRMS_FILENAME); @@ -525,6 +641,7 @@ CODESTARTmodExit objRelease(errmsg, CORE_COMPONENT); objRelease(datetime, CORE_COMPONENT); objRelease(prop, CORE_COMPONENT); + objRelease(statsobj, CORE_COMPONENT); ENDmodExit @@ -570,6 +687,7 @@ CODEmodInit_QueryRegCFSLineHdlr CHKiRet(objUse(errmsg, CORE_COMPONENT)); CHKiRet(objUse(datetime, CORE_COMPONENT)); CHKiRet(objUse(prop, CORE_COMPONENT)); + CHKiRet(objUse(statsobj, CORE_COMPONENT)); /* register config file handlers */ CHKiRet(omsdRegCFSLineHdlr(UCHAR_CONSTANT("imdiagserverrun"), 0, eCmdHdlrGetWord, @@ -585,7 +703,28 @@ CODEmodInit_QueryRegCFSLineHdlr CHKiRet(omsdRegCFSLineHdlr(UCHAR_CONSTANT("imdiagserverinputname"), 0, eCmdHdlrGetWord, NULL, &pszInputName, STD_LOADABLE_MODULE_ID)); CHKiRet(omsdRegCFSLineHdlr(UCHAR_CONSTANT("resetconfigvariables"), 1, eCmdHdlrCustomHandler, - resetConfigVariables, NULL, STD_LOADABLE_MODULE_ID)); + resetConfigVariables, NULL, STD_LOADABLE_MODULE_ID)); + + sem_init(&statsReportingBlocker, 0, 1); + INIT_ATOMIC_HELPER_MUT(mutAllowOnlyOnce); + CHKiConcCtrl(pthread_mutex_init(&mutStatsReporterWatch, NULL)); + CHKiConcCtrl(pthread_cond_init(&statsReporterWatch, NULL)); + + CHKiRet(statsobj.Construct(&diagStats)); + CHKiRet(statsobj.SetName(diagStats, UCHAR_CONSTANT("imdiag-stats-reporting-controller"))); + CHKiRet(statsobj.SetOrigin(diagStats, UCHAR_CONSTANT("imdiag"))); + statsobj.SetStatsObjFlags(diagStats, STATSOBJ_FLAG_DO_PREPEND); + STATSCOUNTER_INIT(potentialArtificialDelayMs, mutPotentialArtificialDelayMs); + CHKiRet(statsobj.AddCounter(diagStats, UCHAR_CONSTANT("potentialTotalArtificialDelayInMs"), + ctrType_IntCtr, CTR_FLAG_NONE, &potentialArtificialDelayMs)); + STATSCOUNTER_INIT(actualArtificialDelayMs, mutActualArtificialDelayMs); + CHKiRet(statsobj.AddCounter(diagStats, UCHAR_CONSTANT("actualTotalArtificialDelayInMs"), + ctrType_IntCtr, CTR_FLAG_NONE, &actualArtificialDelayMs)); + STATSCOUNTER_INIT(delayInvocationCount, mutDelayInvocationCount); + CHKiRet(statsobj.AddCounter(diagStats, UCHAR_CONSTANT("delayInvocationCount"), + ctrType_IntCtr, CTR_FLAG_NONE, &delayInvocationCount)); + CHKiRet(statsobj.SetReadNotifier(diagStats, imdiag_statsReadCallback, NULL)); + CHKiRet(statsobj.ConstructFinalize(diagStats)); ENDmodInit diff --git a/runtime/dynstats.c b/runtime/dynstats.c index a4d34328b..167069fd0 100644 --- a/runtime/dynstats.c +++ b/runtime/dynstats.c @@ -70,30 +70,31 @@ finalize_it: } static inline void -dynstats_destroyCtr(dynstats_bucket_t *b, dynstats_ctr_t *ctr, uint8_t destructStatsCtr) { - if (destructStatsCtr) { - statsobj.DestructCounter(b->stats, ctr->pCtr); - } +dynstats_destroyCtr(dynstats_ctr_t *ctr) { + statsobj.DestructUnlinkedCounter(ctr->pCtr); free(ctr->metric); free(ctr); } static inline void /* assumes exclusive access to bucket */ -dynstats_destroyCounters(dynstats_bucket_t *b) { +dynstats_destroyCountersIn(dynstats_bucket_t *b, htable *table, dynstats_ctr_t *ctrs) { dynstats_ctr_t *ctr; - - hashtable_destroy(b->table, 0); - statsobj.DestructAllCounters(b->stats); - while(1) { - ctr = b->ctrs; - if (ctr == NULL) { - break; - } else { - b->ctrs = ctr->next; - dynstats_destroyCtr(b, ctr, 0); - } + int ctrs_purged = 0; + hashtable_destroy(table, 0); + while (ctrs != NULL) { + ctr = ctrs; + ctrs = ctrs->next; + dynstats_destroyCtr(ctr); + ctrs_purged++; } - STATSCOUNTER_BUMP(b->ctrMetricsPurged, b->mutCtrMetricsPurged, b->metricCount); + STATSCOUNTER_ADD(b->ctrMetricsPurged, b->mutCtrMetricsPurged, ctrs_purged); + ATOMIC_SUB(&b->metricCount, ctrs_purged, &b->mutMetricCount); +} + +static inline void /* assumes exclusive access to bucket */ +dynstats_destroyCounters(dynstats_bucket_t *b) { + statsobj.UnlinkAllCounters(b->stats); + dynstats_destroyCountersIn(b, b->table, b->ctrs); } void @@ -104,6 +105,7 @@ dynstats_destroyBucket(dynstats_bucket_t* b) { pthread_rwlock_wrlock(&b->lock); dynstats_destroyCounters(b); + dynstats_destroyCountersIn(b, b->survivor_table, b->survivor_ctrs); statsobj.Destruct(&b->stats); free(b->name); pthread_rwlock_unlock(&b->lock); @@ -137,37 +139,37 @@ dynstats_addBucketMetrics(dynstats_buckets_t *bkts, dynstats_bucket_t *b, const ustrncpy(metric_suffix, suffix_litteral, DYNSTATS_MAX_BUCKET_NS_METRIC_LENGTH); STATSCOUNTER_INIT(b->ctrOpsOverflow, b->mutCtrOpsOverflow); CHKiRet(statsobj.AddManagedCounter(bkts->global_stats, metric_name_buff, ctrType_IntCtr, - CTR_FLAG_RESETTABLE, &(b->ctrOpsOverflow), &b->pOpsOverflowCtr)); + CTR_FLAG_RESETTABLE, &(b->ctrOpsOverflow), &b->pOpsOverflowCtr, 1)); suffix_litteral = UCHAR_CONSTANT("new_metric_add"); ustrncpy(metric_suffix, suffix_litteral, DYNSTATS_MAX_BUCKET_NS_METRIC_LENGTH); STATSCOUNTER_INIT(b->ctrNewMetricAdd, b->mutCtrNewMetricAdd); CHKiRet(statsobj.AddManagedCounter(bkts->global_stats, metric_name_buff, ctrType_IntCtr, - CTR_FLAG_RESETTABLE, &(b->ctrNewMetricAdd), &b->pNewMetricAddCtr)); + CTR_FLAG_RESETTABLE, &(b->ctrNewMetricAdd), &b->pNewMetricAddCtr, 1)); suffix_litteral = UCHAR_CONSTANT("no_metric"); ustrncpy(metric_suffix, suffix_litteral, DYNSTATS_MAX_BUCKET_NS_METRIC_LENGTH); STATSCOUNTER_INIT(b->ctrNoMetric, b->mutCtrNoMetric); CHKiRet(statsobj.AddManagedCounter(bkts->global_stats, metric_name_buff, ctrType_IntCtr, - CTR_FLAG_RESETTABLE, &(b->ctrNoMetric), &b->pNoMetricCtr)); + CTR_FLAG_RESETTABLE, &(b->ctrNoMetric), &b->pNoMetricCtr, 1)); suffix_litteral = UCHAR_CONSTANT("metrics_purged"); ustrncpy(metric_suffix, suffix_litteral, DYNSTATS_MAX_BUCKET_NS_METRIC_LENGTH); STATSCOUNTER_INIT(b->ctrMetricsPurged, b->mutCtrMetricsPurged); CHKiRet(statsobj.AddManagedCounter(bkts->global_stats, metric_name_buff, ctrType_IntCtr, - CTR_FLAG_RESETTABLE, &(b->ctrMetricsPurged), &b->pMetricsPurgedCtr)); + CTR_FLAG_RESETTABLE, &(b->ctrMetricsPurged), &b->pMetricsPurgedCtr, 1)); suffix_litteral = UCHAR_CONSTANT("ops_ignored"); ustrncpy(metric_suffix, suffix_litteral, DYNSTATS_MAX_BUCKET_NS_METRIC_LENGTH); STATSCOUNTER_INIT(b->ctrOpsIgnored, b->mutCtrOpsIgnored); CHKiRet(statsobj.AddManagedCounter(bkts->global_stats, metric_name_buff, ctrType_IntCtr, - CTR_FLAG_RESETTABLE, &(b->ctrOpsIgnored), &b->pOpsIgnoredCtr)); + CTR_FLAG_RESETTABLE, &(b->ctrOpsIgnored), &b->pOpsIgnoredCtr, 1)); suffix_litteral = UCHAR_CONSTANT("purge_triggered"); ustrncpy(metric_suffix, suffix_litteral, DYNSTATS_MAX_BUCKET_NS_METRIC_LENGTH); STATSCOUNTER_INIT(b->ctrPurgeTriggered, b->mutCtrPurgeTriggered); CHKiRet(statsobj.AddManagedCounter(bkts->global_stats, metric_name_buff, ctrType_IntCtr, - CTR_FLAG_RESETTABLE, &(b->ctrPurgeTriggered), &b->pPurgeTriggeredCtr)); + CTR_FLAG_RESETTABLE, &(b->ctrPurgeTriggered), &b->pPurgeTriggeredCtr, 1)); finalize_it: free(metric_name_buff); @@ -187,7 +189,7 @@ finalize_it: if (b->pOpsIgnoredCtr != NULL) { statsobj.DestructCounter(bkts->global_stats, b->pOpsIgnoredCtr); } - if (b->pPurgeTriggeredCtr != NULL) { + if (b->pPurgeTriggeredCtr != NULL) { statsobj.DestructCounter(bkts->global_stats, b->pPurgeTriggeredCtr); } } @@ -197,29 +199,54 @@ finalize_it: static void no_op_free(void __attribute__((unused)) *ignore) {} -static rsRetVal -dynstats_resetBucket(dynstats_bucket_t *b, uint8_t do_purge) { +static rsRetVal /* assumes exclusive access to bucket */ +dynstats_rebuildSurvivorTable(dynstats_bucket_t *b) { + htable *survivor_table = NULL; + htable *new_table = NULL; size_t htab_sz; DEFiRet; + htab_sz = (size_t) (DYNSTATS_HASHTABLE_SIZE_OVERPROVISIONING * b->maxCardinality + 1); + if (b->table == NULL) { + CHKmalloc(survivor_table = create_hashtable(htab_sz, hash_from_string, key_equals_string, no_op_free)); + } + CHKmalloc(new_table = create_hashtable(htab_sz, hash_from_string, key_equals_string, no_op_free)); + statsobj.UnlinkAllCounters(b->stats); + if (b->survivor_table != NULL) { + dynstats_destroyCountersIn(b, b->survivor_table, b->survivor_ctrs); + } + b->survivor_table = (b->table == NULL) ? survivor_table : b->table; + b->survivor_ctrs = b->ctrs; + b->table = new_table; + b->ctrs = NULL; +finalize_it: + if (iRet != RS_RET_OK) { + errmsg.LogError(errno, RS_RET_INTERNAL_ERROR, "error trying to evict TTL-expired metrics of dyn-stats bucket named: %s", b->name); + if (new_table == NULL) { + errmsg.LogError(errno, RS_RET_INTERNAL_ERROR, "error trying to initialize hash-table for dyn-stats bucket named: %s", b->name); + } else { + hashtable_destroy(new_table, 0); + } + if (b->table == NULL) { + if (survivor_table == NULL) { + errmsg.LogError(errno, RS_RET_INTERNAL_ERROR, "error trying to initialize ttl-survivor hash-table for dyn-stats bucket named: %s", b->name); + } else { + hashtable_destroy(survivor_table, 0); + } + } + } + RETiRet; +} + +static rsRetVal +dynstats_resetBucket(dynstats_bucket_t *b) { + DEFiRet; pthread_rwlock_wrlock(&b->lock); - if (do_purge) { - dynstats_destroyCounters(b); - } - ATOMIC_STORE_0_TO_INT(&b->metricCount, &b->mutMetricCount); + CHKiRet(dynstats_rebuildSurvivorTable(b)); STATSCOUNTER_INC(b->ctrPurgeTriggered, b->mutCtrPurgeTriggered); - b->ctrs = NULL; - if ((b->table = create_hashtable(htab_sz, hash_from_string, key_equals_string, no_op_free)) == NULL) { - errmsg.LogError(errno, RS_RET_INTERNAL_ERROR, "error trying to initialize hash-table for dyn-stats bucket named: %s", b->name); - ABORT_FINALIZE(RS_RET_INTERNAL_ERROR); - } - timeoutComp(&b->metricCleanupTimeout, b->unusedMetricLife); finalize_it: pthread_rwlock_unlock(&b->lock); - if (iRet != RS_RET_OK) { - statsobj.Destruct(&b->stats); - } RETiRet; } @@ -231,7 +258,7 @@ dynstats_resetIfExpired(dynstats_bucket_t *b) { pthread_rwlock_unlock(&b->lock); if (timeout == 0) { errmsg.LogMsg(0, RS_RET_TIMED_OUT, LOG_INFO, "dynstats: bucket '%s' is being reset", b->name); - dynstats_resetBucket(b, 1); + dynstats_resetBucket(b); } } @@ -292,17 +319,17 @@ dynstats_newBucket(const uchar* name, uint8_t resettable, uint32_t maxCardinalit CHKiRet(dynstats_initNewBucketStats(b)); - CHKiRet(dynstats_resetBucket(b, 0)); + CHKiRet(dynstats_resetBucket(b)); CHKiRet(dynstats_addBucketMetrics(bkts, b, name)); pthread_rwlock_wrlock(&bkts->lock); - if (bkts->list == NULL) { - bkts->list = b; - } else { - b->next = bkts->list; - bkts->list = b; - } + if (bkts->list == NULL) { + bkts->list = b; + } else { + b->next = bkts->list; + bkts->list = b; + } pthread_rwlock_unlock(&bkts->lock); } else { errmsg.LogError(0, RS_RET_INTERNAL_ERROR, "dynstats: bucket creation failed, as global-initialization of buckets was unsuccessful"); @@ -317,8 +344,7 @@ finalize_it: pthread_rwlock_destroy(&b->lock); } if (b != NULL) { - free(b->name); - free(b); + dynstats_destroyBucket(b); } } RETiRet; @@ -355,9 +381,9 @@ dynstats_processCnf(struct cnfobj *o) { "param '%s'\n", modpblk.descr[i].name); } } - if (name != NULL) { - CHKiRet(dynstats_newBucket(name, resettable, maxCardinality, unusedMetricLife)); - } + if (name != NULL) { + CHKiRet(dynstats_newBucket(name, resettable, maxCardinality, unusedMetricLife)); + } finalize_it: free(name); @@ -400,7 +426,7 @@ dynstats_destroyAllBuckets() { if (b == NULL) { break; } else { - bkts->list = b->next; + bkts->list = b->next; dynstats_destroyBucket(b); } } @@ -416,13 +442,13 @@ dynstats_findBucket(const uchar* name) { bkts = &loadConf->dynstats_buckets; if (bkts->initialized) { pthread_rwlock_rdlock(&bkts->lock); - b = bkts->list; - while(b != NULL) { - if (! ustrcmp(name, b->name)) { + b = bkts->list; + while(b != NULL) { + if (! ustrcmp(name, b->name)) { break; } - b = b->next; - } + b = b->next; + } pthread_rwlock_unlock(&bkts->lock); } else { b = NULL; @@ -441,14 +467,14 @@ dynstats_createCtr(dynstats_bucket_t *b, const uchar* metric, dynstats_ctr_t **c STATSCOUNTER_INIT((*ctr)->ctr, (*ctr)->mutCtr); CHKiRet(statsobj.AddManagedCounter(b->stats, metric, ctrType_IntCtr, b->resettable ? CTR_FLAG_MUST_RESET : CTR_FLAG_NONE, - &(*ctr)->ctr, &(*ctr)->pCtr)); + &(*ctr)->ctr, &(*ctr)->pCtr, 0)); finalize_it: if (iRet != RS_RET_OK) { - if ((*ctr) != NULL) { - free((*ctr)->metric); - free(*ctr); - *ctr = NULL; - } + if ((*ctr) != NULL) { + free((*ctr)->metric); + free(*ctr); + *ctr = NULL; + } } RETiRet; } @@ -456,7 +482,7 @@ finalize_it: static rsRetVal dynstats_addNewCtr(dynstats_bucket_t *b, const uchar* metric, uint8_t doInitialIncrement) { dynstats_ctr_t *ctr; - dynstats_ctr_t *found_ctr; + dynstats_ctr_t *found_ctr, *survivor_ctr, *effective_ctr; int created; uchar *copy_of_key = NULL; DEFiRet; @@ -471,25 +497,42 @@ dynstats_addNewCtr(dynstats_bucket_t *b, const uchar* metric, uint8_t doInitialI CHKiRet(dynstats_createCtr(b, metric, &ctr)); pthread_rwlock_wrlock(&b->lock); - found_ctr = (dynstats_ctr_t*) hashtable_search(b->table, ctr->metric); + found_ctr = (dynstats_ctr_t*) hashtable_search(b->table, ctr->metric); if (found_ctr != NULL) { if (doInitialIncrement) { STATSCOUNTER_INC(found_ctr->ctr, found_ctr->mutCtr); } } else { - copy_of_key = ustrdup(ctr->metric); - if (copy_of_key != NULL) { - created = hashtable_insert(b->table, copy_of_key, ctr); - } + copy_of_key = ustrdup(ctr->metric); + if (copy_of_key != NULL) { + survivor_ctr = (dynstats_ctr_t*) hashtable_search(b->survivor_table, ctr->metric); + if (survivor_ctr == NULL) { + effective_ctr = ctr; + } else { + effective_ctr = survivor_ctr; + if (survivor_ctr->prev != NULL) { + survivor_ctr->prev->next = survivor_ctr->next; + } + if (survivor_ctr->next != NULL) { + survivor_ctr->next->prev = survivor_ctr->prev; + } + if (survivor_ctr == b->survivor_ctrs) { + b->survivor_ctrs = survivor_ctr->next; + } + } + if ((created = hashtable_insert(b->table, copy_of_key, effective_ctr))) { + statsobj.AddPreCreatedCtr(b->stats, effective_ctr->pCtr); + } + } if (created) { - if (b->ctrs == NULL) { - b->ctrs = ctr; - } else { - ctr->next = b->ctrs; - b->ctrs = ctr; - } + if (b->ctrs != NULL) { + b->ctrs->prev = effective_ctr; + } + effective_ctr->prev = NULL; + effective_ctr->next = b->ctrs; + b->ctrs = effective_ctr; if (doInitialIncrement) { - STATSCOUNTER_INC(ctr->ctr, ctr->mutCtr); + STATSCOUNTER_INC(effective_ctr->ctr, effective_ctr->mutCtr); } } } @@ -497,19 +540,19 @@ dynstats_addNewCtr(dynstats_bucket_t *b, const uchar* metric, uint8_t doInitialI if (found_ctr != NULL) { //ignore - } else if (created) { + } else if (created && (effective_ctr != survivor_ctr)) { ATOMIC_INC(&b->metricCount, &b->mutMetricCount); STATSCOUNTER_INC(b->ctrNewMetricAdd, b->mutCtrNewMetricAdd); - } else { - if (copy_of_key != NULL) { - free(copy_of_key); - } + } else if (! created) { + if (copy_of_key != NULL) { + free(copy_of_key); + } ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); } finalize_it: - if ((! created) && (ctr != NULL)) { - dynstats_destroyCtr(b, ctr, 1); + if (((! created) || (effective_ctr != ctr)) && (ctr != NULL)) { + dynstats_destroyCtr(ctr); } RETiRet; } @@ -529,7 +572,7 @@ dynstats_inc(dynstats_bucket_t *b, uchar* metric) { } if (pthread_rwlock_tryrdlock(&b->lock) == 0) { - ctr = (dynstats_ctr_t *) hashtable_search(b->table, metric); + ctr = (dynstats_ctr_t *) hashtable_search(b->table, metric); if (ctr != NULL) { STATSCOUNTER_INC(ctr->ctr, ctr->mutCtr); } diff --git a/runtime/dynstats.h b/runtime/dynstats.h index e41e31e59..b4a13966c 100644 --- a/runtime/dynstats.h +++ b/runtime/dynstats.h @@ -26,7 +26,9 @@ struct dynstats_ctr_s { STATSCOUNTER_DEF(ctr, mutCtr); ctr_t *pCtr; uchar *metric; - struct dynstats_ctr_s *next; /* linked list ptr */ + /* linked list ptr */ + struct dynstats_ctr_s *next; + struct dynstats_ctr_s *prev; }; struct dynstats_bucket_s { @@ -48,6 +50,11 @@ struct dynstats_bucket_s { ctr_t *pPurgeTriggeredCtr; struct dynstats_bucket_s *next; /* linked list ptr */ struct dynstats_ctr_s *ctrs; + /*survivor objects are used to keep counter values around for upto unused-ttl duration, + so in case it is accessed within (ttl - 2 * ttl) time-period we can re-store the accumulator value from this */ + struct dynstats_ctr_s *survivor_ctrs; + htable *survivor_table; + uint32_t maxCardinality; uint32_t metricCount; pthread_mutex_t mutMetricCount; diff --git a/runtime/srUtils.h b/runtime/srUtils.h index 448fc6b71..36ecfc7dc 100644 --- a/runtime/srUtils.h +++ b/runtime/srUtils.h @@ -96,6 +96,7 @@ int containsGlobWildcard(char *str); void seedRandomNumber(); #define MAX_RANDOM_NUMBER RAND_MAX long int randomNumber(); +long currentTimeMills(); /* mutex operations */ /* some useful constants */ diff --git a/runtime/srutils.c b/runtime/srutils.c index 8b3e6cd58..f9a27514a 100644 --- a/runtime/srutils.c +++ b/runtime/srutils.c @@ -428,6 +428,25 @@ timeoutComp(struct timespec *pt, long iTimeout) return RS_RET_OK; /* so far, this is static... */ } +long +currentTimeMills() { +# if _POSIX_TIMERS > 0 + struct timespec tm; +# else + struct timeval tv; +# endif + +# if _POSIX_TIMERS > 0 + clock_gettime(CLOCK_REALTIME, &tm); +# else + gettimeofday(&tv, NULL); + tm.tv_sec = tv.tv_sec; + tm.tv_nsec = tv.tv_usec * 1000; +# endif + + return tm.tv_sec * 1000 + (tm.tv_sec / 1000000); +} + /* This function is kind of the reverse of timeoutComp() - it takes an absolute * timeout value and computes how far this is in the future. If the value is already diff --git a/runtime/statsobj.c b/runtime/statsobj.c index d350ea46a..4ded2bfa9 100644 --- a/runtime/statsobj.c +++ b/runtime/statsobj.c @@ -68,12 +68,22 @@ static inline void addToObjList(statsobj_t *pThis) { pthread_mutex_lock(&mutStats); - pThis->prev = objLast; - if(objLast != NULL) - objLast->next = pThis; - objLast = pThis; - if(objRoot == NULL) + if (pThis->flags && STATSOBJ_FLAG_DO_PREPEND) { + pThis->next = objRoot; + if (objRoot != NULL) { + objRoot->prev = pThis; + } objRoot = pThis; + if (objLast == NULL) + objLast = pThis; + } else { + pThis->prev = objLast; + if(objLast != NULL) + objLast->next = pThis; + objLast = pThis; + if(objRoot == NULL) + objRoot = pThis; + } pthread_mutex_unlock(&mutStats); } @@ -117,6 +127,7 @@ BEGINobjConstruct(statsobj) /* be sure to specify the object type also in END ma pThis->ctrLast = NULL; pThis->ctrRoot = NULL; pThis->read_notifier = NULL; + pThis->flags = 0; ENDobjConstruct(statsobj) @@ -169,6 +180,11 @@ finalize_it: RETiRet; } +static void +setStatsObjFlags(statsobj_t *pThis, int flags) { + pThis->flags = flags; +} + static rsRetVal setReportingNamespace(statsobj_t *pThis, uchar *ns) { @@ -186,7 +202,7 @@ finalize_it: * is called. */ static rsRetVal -addManagedCounter(statsobj_t *pThis, const uchar *ctrName, statsCtrType_t ctrType, int8_t flags, void *pCtr, ctr_t **entryRef) +addManagedCounter(statsobj_t *pThis, const uchar *ctrName, statsCtrType_t ctrType, int8_t flags, void *pCtr, ctr_t **entryRef, int8_t linked) { ctr_t *ctr; DEFiRet; @@ -210,7 +226,9 @@ addManagedCounter(statsobj_t *pThis, const uchar *ctrName, statsCtrType_t ctrTyp ctr->val.pInt = (int*) pCtr; break; } - addCtrToList(pThis, ctr); + if (linked) { + addCtrToList(pThis, ctr); + } *entryRef = ctr; finalize_it: @@ -223,21 +241,33 @@ finalize_it: RETiRet; } +static inline void +addPreCreatedCounter(statsobj_t *pThis, ctr_t *pCtr) +{ + pCtr->next = NULL; + pCtr->prev = NULL; + addCtrToList(pThis, pCtr); +} + static rsRetVal addCounter(statsobj_t *pThis, const uchar *ctrName, statsCtrType_t ctrType, int8_t flags, void *pCtr) { ctr_t *ctr; DEFiRet; - CHKiRet(addManagedCounter(pThis, ctrName, ctrType, flags, pCtr, &ctr)); + CHKiRet(addManagedCounter(pThis, ctrName, ctrType, flags, pCtr, &ctr, 1)); finalize_it: RETiRet; } -static rsRetVal +static void +destructUnlinkedCounter(ctr_t *ctr) { + free(ctr->name); + free(ctr); +} + +static void destructCounter(statsobj_t *pThis, ctr_t *pCtr) { - DEFiRet; - pthread_mutex_lock(&pThis->mutCtr); if (pCtr->prev != NULL) { pCtr->prev->next = pCtr->next; @@ -252,10 +282,7 @@ destructCounter(statsobj_t *pThis, ctr_t *pCtr) pThis->ctrRoot = pCtr->next; } pthread_mutex_unlock(&pThis->mutCtr); - free(pCtr->name); - free(pCtr); - - RETiRet; + destructUnlinkedCounter(pCtr); } static inline void @@ -524,7 +551,6 @@ finalize_it: RETiRet; } - /* Enable statistics gathering. currently there is no function to disable it * again, as this is right now not needed. */ @@ -578,24 +604,26 @@ finalize_it: RETiRet; } - -static rsRetVal -destructAllCounters(statsobj_t *pThis) { - DEFiRet; - ctr_t *ctr, *ctrToDel; - +static ctr_t* +unlinkAllCounters(statsobj_t *pThis) { + ctr_t *ctr; + pthread_mutex_lock(&pThis->mutCtr); ctr = pThis->ctrRoot; + pThis->ctrLast = NULL; + pThis->ctrRoot = NULL; + pthread_mutex_unlock(&pThis->mutCtr); + return ctr; +} + +static void +destructUnlinkedCounters(ctr_t *ctr) { + ctr_t *ctrToDel; + while(ctr != NULL) { ctrToDel = ctr; ctr = ctr->next; - free(ctrToDel->name); - free(ctrToDel); + destructUnlinkedCounter(ctrToDel); } - - pThis->ctrLast = NULL; - pThis->ctrRoot = NULL; - - RETiRet; } /* check if a sender has not sent info to us for an extended period @@ -644,7 +672,7 @@ CODESTARTobjDestruct(statsobj) removeFromObjList(pThis); /* destruct counters */ - CHKiRet(destructAllCounters(pThis)); + destructUnlinkedCounters(unlinkAllCounters(pThis)); pthread_mutex_destroy(&pThis->mutCtr); free(pThis->name); @@ -681,11 +709,14 @@ CODESTARTobjQueryInterface(statsobj) pIf->SetOrigin = setOrigin; pIf->SetReadNotifier = setReadNotifier; pIf->SetReportingNamespace = setReportingNamespace; + pIf->SetStatsObjFlags = setStatsObjFlags; pIf->GetAllStatsLines = getAllStatsLines; pIf->AddCounter = addCounter; pIf->AddManagedCounter = addManagedCounter; + pIf->AddPreCreatedCtr = addPreCreatedCounter; pIf->DestructCounter = destructCounter; - pIf->DestructAllCounters = destructAllCounters; + pIf->DestructUnlinkedCounter = destructUnlinkedCounter; + pIf->UnlinkAllCounters = unlinkAllCounters; pIf->EnableStats = enableStats; finalize_it: ENDobjQueryInterface(statsobj) diff --git a/runtime/statsobj.h b/runtime/statsobj.h index 4f47933ed..dd26c0e27 100644 --- a/runtime/statsobj.h +++ b/runtime/statsobj.h @@ -56,6 +56,10 @@ typedef enum statsFmtType_e { #define CTR_FLAG_RESETTABLE 1 #define CTR_FLAG_MUST_RESET 2 +/* statsobj flags */ +#define STATSOBJ_FLAG_NONE 0 +#define STATSOBJ_FLAG_DO_PREPEND 1 + /* helper entity, the counter */ typedef struct ctr_s { uchar *name; @@ -79,6 +83,7 @@ struct statsobj_s { pthread_mutex_t mutCtr; /* to guard counter linked-list ops */ ctr_t *ctrRoot; /* doubly-linked list of statsobj counters */ ctr_t *ctrLast; + int flags; /* used to link ourselves together */ statsobj_t *prev; statsobj_t *next; @@ -101,12 +106,15 @@ BEGINinterface(statsobj) /* name must also be changed in ENDinterface macro! */ rsRetVal (*SetOrigin)(statsobj_t *pThis, uchar *name); /* added v12, 2014-09-08 */ rsRetVal (*SetReadNotifier)(statsobj_t *pThis, statsobj_read_notifier_t notifier, void* ctx); rsRetVal (*SetReportingNamespace)(statsobj_t *pThis, uchar *ns); + void (*SetStatsObjFlags)(statsobj_t *pThis, int flags); //rsRetVal (*GetStatsLine)(statsobj_t *pThis, cstr_t **ppcstr); rsRetVal (*GetAllStatsLines)(rsRetVal(*cb)(void*, cstr_t*), void *usrptr, statsFmtType_t fmt, int8_t bResetCtr); rsRetVal (*AddCounter)(statsobj_t *pThis, const uchar *ctrName, statsCtrType_t ctrType, int8_t flags, void *pCtr); - rsRetVal (*AddManagedCounter)(statsobj_t *pThis, const uchar *ctrName, statsCtrType_t ctrType, int8_t flags, void *pCtr, ctr_t **ref); - rsRetVal (*DestructCounter)(statsobj_t *pThis, ctr_t *ref); - rsRetVal (*DestructAllCounters)(statsobj_t *pThis); + rsRetVal (*AddManagedCounter)(statsobj_t *pThis, const uchar *ctrName, statsCtrType_t ctrType, int8_t flags, void *pCtr, ctr_t **ref, int8_t linked); + void (*AddPreCreatedCtr)(statsobj_t *pThis, ctr_t *ctr); + void (*DestructCounter)(statsobj_t *pThis, ctr_t *ref); + void (*DestructUnlinkedCounter)(ctr_t *ctr); + ctr_t* (*UnlinkAllCounters)(statsobj_t *pThis); rsRetVal (*EnableStats)(void); ENDinterface(statsobj) #define statsobjCURR_IF_VERSION 12 /* increment whenever you change the interface structure! */ @@ -173,7 +181,7 @@ void checkGoneAwaySenders(time_t); if(GatherStats) \ ATOMIC_INC_uint64(&ctr, &mut); -#define STATSCOUNTER_BUMP(ctr, mut, delta) \ +#define STATSCOUNTER_ADD(ctr, mut, delta) \ if(GatherStats) \ ATOMIC_ADD_uint64(&ctr, &mut, delta); diff --git a/tests/Makefile.am b/tests/Makefile.am index ae1ec46c8..43ac0a4bb 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -287,7 +287,8 @@ TESTS += \ dynstats-json.sh \ stats-cee.sh \ stats-json-es.sh \ - dynstats_reset_without_pstats_reset.sh + dynstats_reset_without_pstats_reset.sh \ + dynstats_prevent_premature_eviction.sh if HAVE_VALGRIND TESTS += \ dynstats-vg.sh \ @@ -295,7 +296,8 @@ TESTS += \ dynstats_overflow-vg.sh \ dynstats-json-vg.sh \ stats-json-vg.sh \ - stats-cee-vg.sh + stats-cee-vg.sh \ + dynstats_prevent_premature_eviction-vg.sh endif endif @@ -1061,6 +1063,8 @@ EXTRA_DIST= \ dynstats_reset-vg.sh \ dynstats.sh \ dynstats-vg.sh \ + dynstats_prevent_premature_eviction.sh \ + dynstats_prevent_premature_eviction-vg.sh \ testsuites/dynstats.conf \ testsuites/dynstats_ctr_reset.conf \ testsuites/dynstats_reset_without_pstats_reset.conf \ @@ -1275,3 +1279,4 @@ nettester_LDADD = $(SOL_LIBS) #rscript_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) #rscript_LDADD = $(RSRT_LIBS) $(ZLIB_LIBS) $(PTHREADS_LIBS) $(SOL_LIBS) #rscript_LDFLAGS = -export-dynamic + diff --git a/tests/diag.sh b/tests/diag.sh index a50acc1db..19a79f731 100755 --- a/tests/diag.sh +++ b/tests/diag.sh @@ -345,6 +345,18 @@ case $1 in . $srcdir/diag.sh error-exit 1 fi ;; + 'block-stats-flush') + echo blocking stats flush + echo "blockStatsReporting" | ./diagtalker || . $srcdir/diag.sh error-exit $? + ;; + 'await-stats-flush-after-block') + echo unblocking stats flush and waiting for it + echo "awaitStatsReport" | ./diagtalker || . $srcdir/diag.sh error-exit $? + ;; + 'allow-single-stats-flush-after-block-and-wait-for-it') + echo blocking stats flush + echo "awaitStatsReport block_again" | ./diagtalker || . $srcdir/diag.sh error-exit $? + ;; 'wait-for-stats-flush') echo "will wait for stats push" while [[ ! -f $2 ]]; do diff --git a/tests/diagtalker.c b/tests/diagtalker.c index f12ff9cd5..644ee468a 100644 --- a/tests/diagtalker.c +++ b/tests/diagtalker.c @@ -132,6 +132,9 @@ doProcessing() sendCmd(fd, line, len); waitRsp(fd, line, sizeof(line)); printf("imdiag[%d]: %s", targetPort, line); + if (strstr(line, "imdiag::error") != NULL) { + exit(1); + } } } diff --git a/tests/dynstats_overflow-vg.sh b/tests/dynstats_overflow-vg.sh index 2e1854e6c..a0ad9d806 100755 --- a/tests/dynstats_overflow-vg.sh +++ b/tests/dynstats_overflow-vg.sh @@ -5,30 +5,41 @@ echo =========================================================================== echo \[dynstats_overflow-vg.sh\]: test for gathering stats when metrics exceed provisioned capacity . $srcdir/diag.sh init . $srcdir/diag.sh startup-vg dynstats_overflow.conf -. $srcdir/diag.sh wait-for-dyn-stats-reset 'rsyslog.out.stats.log' 'msg_stats' +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh block-stats-flush . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_more_0 -. $srcdir/diag.sh wait-queueempty -. $srcdir/diag.sh msleep 1000 . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_more_1 . $srcdir/diag.sh wait-queueempty -. $srcdir/diag.sh msleep 3100 #sleep above + this = 4 seconds, so metric-names reset should have happened -. $srcdir/diag.sh wait-queueempty +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it + . $srcdir/diag.sh first-column-sum-check 's/.*foo=\([0-9]\+\)/\1/g' 'foo=' 'rsyslog.out.stats.log' 5 . $srcdir/diag.sh first-column-sum-check 's/.*bar=\([0-9]\+\)/\1/g' 'bar=' 'rsyslog.out.stats.log' 1 . $srcdir/diag.sh first-column-sum-check 's/.*baz=\([0-9]\+\)/\1/g' 'baz=' 'rsyslog.out.stats.log' 2 + . $srcdir/diag.sh custom-assert-content-missing 'quux' 'rsyslog.out.stats.log' . $srcdir/diag.sh custom-assert-content-missing 'corge' 'rsyslog.out.stats.log' . $srcdir/diag.sh custom-assert-content-missing 'grault' 'rsyslog.out.stats.log' + +. $srcdir/diag.sh first-column-sum-check 's/.*new_metric_add=\([0-9]\+\)/\1/g' 'new_metric_add=' 'rsyslog.out.stats.log' 3 +. $srcdir/diag.sh first-column-sum-check 's/.*ops_overflow=\([0-9]\+\)/\1/g' 'ops_overflow=' 'rsyslog.out.stats.log' 5 +. $srcdir/diag.sh first-column-sum-check 's/.*no_metric=\([0-9]\+\)/\1/g' 'no_metric=' 'rsyslog.out.stats.log' 0 + +#ttl-expiry(2*ttl in worst case, ttl + delta in best) so metric-names reset should have happened +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it +. $srcdir/diag.sh await-stats-flush-after-block + +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' + +. $srcdir/diag.sh first-column-sum-check 's/.*metrics_purged=\([0-9]\+\)/\1/g' 'metrics_purged=' 'rsyslog.out.stats.log' 3 + rm $srcdir/rsyslog.out.stats.log . $srcdir/diag.sh issue-HUP #reopen stats file -. $srcdir/diag.sh wait-for-dyn-stats-reset 'rsyslog.out.stats.log' 'msg_stats' +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh block-stats-flush . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_more_2 -. $srcdir/diag.sh msleep 4100 -echo doing shutdown -. $srcdir/diag.sh shutdown-when-empty -echo wait on shutdown -. $srcdir/diag.sh wait-shutdown-vg -. $srcdir/diag.sh check-exit-vg +. $srcdir/diag.sh wait-queueempty +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it + . $srcdir/diag.sh content-check "foo 001 0" . $srcdir/diag.sh content-check "bar 002 0" . $srcdir/diag.sh content-check "baz 003 0" @@ -52,5 +63,20 @@ echo wait on shutdown . $srcdir/diag.sh first-column-sum-check 's/.*grault=\([0-9]\+\)/\1/g' 'grault=' 'rsyslog.out.stats.log' 1 . $srcdir/diag.sh first-column-sum-check 's/.*quux=\([0-9]\+\)/\1/g' 'quux=' 'rsyslog.out.stats.log' 1 +. $srcdir/diag.sh first-column-sum-check 's/.*new_metric_add=\([0-9]\+\)/\1/g' 'new_metric_add=' 'rsyslog.out.stats.log' 3 +. $srcdir/diag.sh first-column-sum-check 's/.*ops_overflow=\([0-9]\+\)/\1/g' 'ops_overflow=' 'rsyslog.out.stats.log' 1 +. $srcdir/diag.sh first-column-sum-check 's/.*no_metric=\([0-9]\+\)/\1/g' 'no_metric=' 'rsyslog.out.stats.log' 0 + +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it +. $srcdir/diag.sh await-stats-flush-after-block + +echo doing shutdown +. $srcdir/diag.sh shutdown-when-empty +echo wait on shutdown +. $srcdir/diag.sh wait-shutdown-vg +. $srcdir/diag.sh check-exit-vg + +. $srcdir/diag.sh first-column-sum-check 's/.*metrics_purged=\([0-9]\+\)/\1/g' 'metrics_purged=' 'rsyslog.out.stats.log' 3 + . $srcdir/diag.sh custom-assert-content-missing 'foo' 'rsyslog.out.stats.log' . $srcdir/diag.sh exit diff --git a/tests/dynstats_overflow.sh b/tests/dynstats_overflow.sh index 40e87c819..736860af4 100755 --- a/tests/dynstats_overflow.sh +++ b/tests/dynstats_overflow.sh @@ -5,14 +5,12 @@ echo =========================================================================== echo \[dynstats_overflow.sh\]: test for gathering stats when metrics exceed provisioned capacity . $srcdir/diag.sh init . $srcdir/diag.sh startup dynstats_overflow.conf -. $srcdir/diag.sh wait-for-dyn-stats-reset 'rsyslog.out.stats.log' 'msg_stats' +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh block-stats-flush . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_more_0 -. $srcdir/diag.sh wait-queueempty -. $srcdir/diag.sh msleep 800 . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_more_1 . $srcdir/diag.sh wait-queueempty -. $srcdir/diag.sh msleep 3100 #sleep above + this = 2 seconds, so metric-names reset should have happened -. $srcdir/diag.sh wait-queueempty +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it . $srcdir/diag.sh first-column-sum-check 's/.*foo=\([0-9]\+\)/\1/g' 'foo=' 'rsyslog.out.stats.log' 5 . $srcdir/diag.sh first-column-sum-check 's/.*bar=\([0-9]\+\)/\1/g' 'bar=' 'rsyslog.out.stats.log' 1 @@ -25,17 +23,23 @@ echo \[dynstats_overflow.sh\]: test for gathering stats when metrics exceed prov . $srcdir/diag.sh first-column-sum-check 's/.*new_metric_add=\([0-9]\+\)/\1/g' 'new_metric_add=' 'rsyslog.out.stats.log' 3 . $srcdir/diag.sh first-column-sum-check 's/.*ops_overflow=\([0-9]\+\)/\1/g' 'ops_overflow=' 'rsyslog.out.stats.log' 5 . $srcdir/diag.sh first-column-sum-check 's/.*no_metric=\([0-9]\+\)/\1/g' 'no_metric=' 'rsyslog.out.stats.log' 0 + +#ttl-expiry(2*ttl in worst case, ttl + delta in best) so metric-names reset should have happened +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it +. $srcdir/diag.sh await-stats-flush-after-block + +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' + . $srcdir/diag.sh first-column-sum-check 's/.*metrics_purged=\([0-9]\+\)/\1/g' 'metrics_purged=' 'rsyslog.out.stats.log' 3 rm $srcdir/rsyslog.out.stats.log . $srcdir/diag.sh issue-HUP #reopen stats file -. $srcdir/diag.sh wait-for-dyn-stats-reset 'rsyslog.out.stats.log' 'msg_stats' +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh block-stats-flush . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_more_2 -. $srcdir/diag.sh msleep 4100 -echo doing shutdown -. $srcdir/diag.sh shutdown-when-empty -echo wait on shutdown -. $srcdir/diag.sh wait-shutdown +. $srcdir/diag.sh wait-queueempty +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it + . $srcdir/diag.sh content-check "foo 001 0" . $srcdir/diag.sh content-check "bar 002 0" . $srcdir/diag.sh content-check "baz 003 0" @@ -62,6 +66,15 @@ echo wait on shutdown . $srcdir/diag.sh first-column-sum-check 's/.*new_metric_add=\([0-9]\+\)/\1/g' 'new_metric_add=' 'rsyslog.out.stats.log' 3 . $srcdir/diag.sh first-column-sum-check 's/.*ops_overflow=\([0-9]\+\)/\1/g' 'ops_overflow=' 'rsyslog.out.stats.log' 1 . $srcdir/diag.sh first-column-sum-check 's/.*no_metric=\([0-9]\+\)/\1/g' 'no_metric=' 'rsyslog.out.stats.log' 0 + +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it +. $srcdir/diag.sh await-stats-flush-after-block + +echo doing shutdown +. $srcdir/diag.sh shutdown-when-empty +echo wait on shutdown +. $srcdir/diag.sh wait-shutdown + . $srcdir/diag.sh first-column-sum-check 's/.*metrics_purged=\([0-9]\+\)/\1/g' 'metrics_purged=' 'rsyslog.out.stats.log' 3 . $srcdir/diag.sh custom-assert-content-missing 'foo' 'rsyslog.out.stats.log' diff --git a/tests/dynstats_prevent_premature_eviction-vg.sh b/tests/dynstats_prevent_premature_eviction-vg.sh new file mode 100755 index 000000000..3eba5f91c --- /dev/null +++ b/tests/dynstats_prevent_premature_eviction-vg.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# added 2016-04-13 by singh.janmejay +# This file is part of the rsyslog project, released under ASL 2.0 +echo =============================================================================== +echo \[dynstats_prevent_premature_eviction-vg.sh\]: test for ensuring metrics are not evicted before unused-ttl with valgrind +. $srcdir/diag.sh init +. $srcdir/diag.sh startup-vg dynstats_reset.conf +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh block-stats-flush +. $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_1 +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it +. $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_2 +. $srcdir/diag.sh allow-single-stats-flush-after-block-and-wait-for-it +. $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_3 +. $srcdir/diag.sh await-stats-flush-after-block +. $srcdir/diag.sh wait-queueempty +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh content-check "foo 001 0" +. $srcdir/diag.sh content-check "foo 006 0" +echo doing shutdown +. $srcdir/diag.sh shutdown-when-empty +echo wait on shutdown +. $srcdir/diag.sh wait-shutdown-vg +. $srcdir/diag.sh check-exit-vg + # because dyn-accumulators for existing metrics were posted-to under a second, they should not have been evicted +. $srcdir/diag.sh custom-content-check 'baz=2' 'rsyslog.out.stats.log' +. $srcdir/diag.sh custom-content-check 'bar=1' 'rsyslog.out.stats.log' +. $srcdir/diag.sh custom-content-check 'foo=3' 'rsyslog.out.stats.log' +# sum is high because accumulators were never reset, and we expect them to last specific number of cycles(when we posted before ttl expiry) +. $srcdir/diag.sh first-column-sum-check 's/.*foo=\([0-9]\+\)/\1/g' 'foo=' 'rsyslog.out.stats.log' 6 +. $srcdir/diag.sh first-column-sum-check 's/.*bar=\([0-9]\+\)/\1/g' 'bar=' 'rsyslog.out.stats.log' 1 +. $srcdir/diag.sh first-column-sum-check 's/.*baz=\([0-9]\+\)/\1/g' 'baz=' 'rsyslog.out.stats.log' 3 +. $srcdir/diag.sh first-column-sum-check 's/.*new_metric_add=\([0-9]\+\)/\1/g' 'new_metric_add=' 'rsyslog.out.stats.log' 3 +. $srcdir/diag.sh first-column-sum-check 's/.*ops_overflow=\([0-9]\+\)/\1/g' 'ops_overflow=' 'rsyslog.out.stats.log' 0 +. $srcdir/diag.sh first-column-sum-check 's/.*no_metric=\([0-9]\+\)/\1/g' 'no_metric=' 'rsyslog.out.stats.log' 0 +. $srcdir/diag.sh exit diff --git a/tests/dynstats_prevent_premature_eviction.sh b/tests/dynstats_prevent_premature_eviction.sh new file mode 100755 index 000000000..cec539078 --- /dev/null +++ b/tests/dynstats_prevent_premature_eviction.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# added 2016-04-13 by singh.janmejay +# This file is part of the rsyslog project, released under ASL 2.0 +echo =============================================================================== +echo \[dynstats_prevent_premature_eviction.sh\]: test for ensuring metrics are not evicted before unused-ttl +. $srcdir/diag.sh init +. $srcdir/diag.sh startup dynstats_reset.conf +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh msleep 800 +. $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_1 +. $srcdir/diag.sh msleep 400 +. $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_2 +. $srcdir/diag.sh msleep 900 +. $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_3 +. $srcdir/diag.sh wait-queueempty +. $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' +. $srcdir/diag.sh content-check "foo 001 0" +. $srcdir/diag.sh content-check "foo 006 0" +echo doing shutdown +. $srcdir/diag.sh shutdown-when-empty +echo wait on shutdown +. $srcdir/diag.sh wait-shutdown + # because dyn-accumulators for existing metrics were posted-to under a second, they should not have been evicted +. $srcdir/diag.sh custom-content-check 'baz=2' 'rsyslog.out.stats.log' +. $srcdir/diag.sh custom-content-check 'bar=1' 'rsyslog.out.stats.log' +. $srcdir/diag.sh custom-content-check 'foo=3' 'rsyslog.out.stats.log' +# sum is high because accumulators were never reset, and we expect them to last specific number of cycles(when we posted before ttl expiry) +. $srcdir/diag.sh first-column-sum-check 's/.*foo=\([0-9]\+\)/\1/g' 'foo=' 'rsyslog.out.stats.log' 6 +. $srcdir/diag.sh first-column-sum-check 's/.*bar=\([0-9]\+\)/\1/g' 'bar=' 'rsyslog.out.stats.log' 1 +. $srcdir/diag.sh first-column-sum-check 's/.*baz=\([0-9]\+\)/\1/g' 'baz=' 'rsyslog.out.stats.log' 3 +. $srcdir/diag.sh first-column-sum-check 's/.*new_metric_add=\([0-9]\+\)/\1/g' 'new_metric_add=' 'rsyslog.out.stats.log' 3 +. $srcdir/diag.sh first-column-sum-check 's/.*ops_overflow=\([0-9]\+\)/\1/g' 'ops_overflow=' 'rsyslog.out.stats.log' 0 +. $srcdir/diag.sh first-column-sum-check 's/.*no_metric=\([0-9]\+\)/\1/g' 'no_metric=' 'rsyslog.out.stats.log' 0 +. $srcdir/diag.sh exit diff --git a/tests/dynstats_reset-vg.sh b/tests/dynstats_reset-vg.sh index 9303afbee..391260b08 100755 --- a/tests/dynstats_reset-vg.sh +++ b/tests/dynstats_reset-vg.sh @@ -7,11 +7,11 @@ echo \[dynstats_reset-vg.sh\]: test for gathering stats with a known-dyn-metrics . $srcdir/diag.sh startup-vg dynstats_reset.conf . $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_1 -. $srcdir/diag.sh msleep 1100 +. $srcdir/diag.sh msleep 2100 #one second for unused-metrics to be kept under observation, another for them to be cleared off . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_2 -. $srcdir/diag.sh msleep 1100 +. $srcdir/diag.sh msleep 2100 . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_3 -. $srcdir/diag.sh msleep 1100 +. $srcdir/diag.sh msleep 2100 . $srcdir/diag.sh wait-queueempty . $srcdir/diag.sh content-check "foo 001 0" . $srcdir/diag.sh content-check "bar 002 0" diff --git a/tests/dynstats_reset.sh b/tests/dynstats_reset.sh index 708b4d7c5..84c43ee5f 100755 --- a/tests/dynstats_reset.sh +++ b/tests/dynstats_reset.sh @@ -7,11 +7,11 @@ echo \[dynstats_reset.sh\]: test for gathering stats with a known-dyn-metrics re . $srcdir/diag.sh startup dynstats_reset.conf . $srcdir/diag.sh wait-for-stats-flush 'rsyslog.out.stats.log' . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_1 -. $srcdir/diag.sh msleep 1100 +. $srcdir/diag.sh msleep 2100 . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_2 -. $srcdir/diag.sh msleep 1100 +. $srcdir/diag.sh msleep 2100 . $srcdir/diag.sh injectmsg-litteral $srcdir/testsuites/dynstats_input_3 -. $srcdir/diag.sh msleep 1100 +. $srcdir/diag.sh msleep 2100 . $srcdir/diag.sh wait-queueempty . $srcdir/diag.sh content-check "foo 001 0" . $srcdir/diag.sh content-check "bar 002 0" diff --git a/tests/testsuites/dynstats_overflow.conf b/tests/testsuites/dynstats_overflow.conf index af8940dea..8ff343fe6 100644 --- a/tests/testsuites/dynstats_overflow.conf +++ b/tests/testsuites/dynstats_overflow.conf @@ -4,11 +4,11 @@ ruleset(name="stats") { action(type="omfile" file="./rsyslog.out.stats.log") } -module(load="../plugins/impstats/.libs/impstats" interval="1" severity="7" resetCounters="on" Ruleset="stats" bracketing="on") +module(load="../plugins/impstats/.libs/impstats" interval="2" severity="7" resetCounters="on" Ruleset="stats" bracketing="on") template(name="outfmt" type="string" string="%msg% %$.increment_successful%\n") -dyn_stats(name="msg_stats" unusedMetricLife="4" maxCardinality="3") +dyn_stats(name="msg_stats" unusedMetricLife="1" maxCardinality="3") set $.msg_prefix = field($msg, 32, 1); diff --git a/tests/testsuites/dynstats_reset.conf b/tests/testsuites/dynstats_reset.conf index 9ae36e67b..73fc50e2f 100644 --- a/tests/testsuites/dynstats_reset.conf +++ b/tests/testsuites/dynstats_reset.conf @@ -8,7 +8,7 @@ module(load="../plugins/impstats/.libs/impstats" interval="1" severity="7" reset template(name="outfmt" type="string" string="%msg% %$.increment_successful%\n") -dyn_stats(name="msg_stats" unusedMetricLife="1") +dyn_stats(name="msg_stats" unusedMetricLife="1" resettable="off") set $.msg_prefix = field($msg, 32, 1);