mirror of
https://github.com/rsyslog/rsyslog.git
synced 2026-06-19 10:32:55 +02:00
Merge pull request #7162 from rgerhards/codex/i5693-omhttp-retry
omhttp: avoid retry-ruleset self-stall
This commit is contained in:
commit
50bdea0958
@ -1077,7 +1077,11 @@ static rsRetVal queueBatchOnRetryRuleset(wrkrInstanceData_t *const pWrkrData, in
|
|||||||
|
|
||||||
// Construct the message object
|
// Construct the message object
|
||||||
CHKiRet(msgConstruct(&pMsg));
|
CHKiRet(msgConstruct(&pMsg));
|
||||||
CHKiRet(MsgSetFlowControlType(pMsg, eFLOWCTL_FULL_DELAY));
|
/* These messages are generated by an action worker. In recursive
|
||||||
|
* retry-ruleset setups, FULL_DELAY can make that worker block on the
|
||||||
|
* same queue it must drain. LIGHT_DELAY still applies short
|
||||||
|
* backpressure without waiting indefinitely at the full-delay mark. */
|
||||||
|
CHKiRet(MsgSetFlowControlType(pMsg, eFLOWCTL_LIGHT_DELAY));
|
||||||
MsgSetInputName(pMsg, pInputName);
|
MsgSetInputName(pMsg, pInputName);
|
||||||
MsgSetRawMsg(pMsg, (const char *)msgData, ustrlen(msgData));
|
MsgSetRawMsg(pMsg, (const char *)msgData, ustrlen(msgData));
|
||||||
MsgSetMSGoffs(pMsg, 0); // No header
|
MsgSetMSGoffs(pMsg, 0); // No header
|
||||||
|
|||||||
@ -959,6 +959,7 @@ TESTS_OMHTTP = \
|
|||||||
omhttp-profile-loki.sh \
|
omhttp-profile-loki.sh \
|
||||||
omhttp-batch-newline.sh \
|
omhttp-batch-newline.sh \
|
||||||
omhttp-retry.sh \
|
omhttp-retry.sh \
|
||||||
|
omhttp-retry-ruleset-recovery.sh \
|
||||||
omhttp-retry-timeout.sh \
|
omhttp-retry-timeout.sh \
|
||||||
wti-shutdown-saveonshutdown-omhttp.sh \
|
wti-shutdown-saveonshutdown-omhttp.sh \
|
||||||
omhttp-httpheaderkey.sh \
|
omhttp-httpheaderkey.sh \
|
||||||
|
|||||||
81
tests/omhttp-retry-ruleset-recovery.sh
Executable file
81
tests/omhttp-retry-ruleset-recovery.sh
Executable file
@ -0,0 +1,81 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# add 2026-05-31 by OpenAI Codex
|
||||||
|
# This is a regression test for issue #5693. It verifies that an omhttp
|
||||||
|
# action using retry.ruleset can recover after a short HTTP outage instead of
|
||||||
|
# stalling behind retry-ruleset queue flow-control watermarks. The helper
|
||||||
|
# fails requests for one second starting with the first POST, then accepts
|
||||||
|
# traffic again. Success is proven by the normal sequence oracle over all
|
||||||
|
# generated messages after rsyslog drains and shuts down. TEST_MAX_RUNTIME is
|
||||||
|
# intentionally bounded at 120s so the historical stall fails quickly while
|
||||||
|
# leaving enough room for CI scheduling and retry backoff jitter.
|
||||||
|
. ${srcdir:=.}/diag.sh init
|
||||||
|
|
||||||
|
export TEST_MAX_RUNTIME=${TEST_MAX_RUNTIME:-120}
|
||||||
|
NUMMESSAGES=200
|
||||||
|
|
||||||
|
omhttp_start_server 0 --fail-interval-start 0 --fail-interval-stop 1
|
||||||
|
# shellcheck disable=SC2154 # set by omhttp_start_server in diag.sh
|
||||||
|
port="$omhttp_server_lstnport"
|
||||||
|
|
||||||
|
fetch_omhttp_data() {
|
||||||
|
omhttp_get_data "$port" my/endpoint kafkarest
|
||||||
|
}
|
||||||
|
|
||||||
|
generate_conf
|
||||||
|
add_conf '
|
||||||
|
module(load="../contrib/omhttp/.libs/omhttp")
|
||||||
|
|
||||||
|
template(name="tpl" type="string"
|
||||||
|
string="{\"msgnum\":\"%msg:F,58:2%\"}")
|
||||||
|
|
||||||
|
template(name="tpl_retry" type="string" string="%msg%")
|
||||||
|
|
||||||
|
ruleset(name="rs_q_default" queue.type="LinkedList" queue.size="500") {
|
||||||
|
action(type="omhttp"
|
||||||
|
name="default_omhttp"
|
||||||
|
server="localhost"
|
||||||
|
serverport="'$port'"
|
||||||
|
restpath="my/endpoint"
|
||||||
|
template="tpl"
|
||||||
|
batch="on"
|
||||||
|
batch.format="kafkarest"
|
||||||
|
batch.maxsize="10"
|
||||||
|
retry="on"
|
||||||
|
retry.ruleset="rs_q_retry"
|
||||||
|
usehttps="off"
|
||||||
|
action.resumeRetryCount="-1"
|
||||||
|
action.resumeInterval="1"
|
||||||
|
action.resumeIntervalMax="1"
|
||||||
|
checkpath="ping")
|
||||||
|
}
|
||||||
|
|
||||||
|
ruleset(name="rs_q_retry" queue.type="LinkedList" queue.size="5000" queue.lightdelaymark="100" queue.fulldelaymark="150" queue.timeoutenqueue="30000") {
|
||||||
|
action(type="omhttp"
|
||||||
|
name="retry_omhttp"
|
||||||
|
server="localhost"
|
||||||
|
serverport="'$port'"
|
||||||
|
restpath="my/endpoint"
|
||||||
|
template="tpl_retry"
|
||||||
|
batch="on"
|
||||||
|
batch.format="kafkarest"
|
||||||
|
batch.maxsize="10"
|
||||||
|
usehttps="off"
|
||||||
|
action.resumeRetryCount="-1"
|
||||||
|
action.resumeInterval="1"
|
||||||
|
action.resumeIntervalMax="1"
|
||||||
|
checkpath="ping")
|
||||||
|
}
|
||||||
|
|
||||||
|
if $msg contains "msgnum:" then
|
||||||
|
call rs_q_default
|
||||||
|
'
|
||||||
|
startup
|
||||||
|
injectmsg 0 "$NUMMESSAGES"
|
||||||
|
|
||||||
|
PRE_SEQ_CHECK_FUNC=fetch_omhttp_data wait_seq_check 0 $((NUMMESSAGES - 1)) -d
|
||||||
|
shutdown_when_empty
|
||||||
|
wait_shutdown
|
||||||
|
fetch_omhttp_data
|
||||||
|
omhttp_stop_server
|
||||||
|
seq_check 0 $((NUMMESSAGES - 1)) -d
|
||||||
|
exit_test
|
||||||
@ -7,6 +7,8 @@ import base64
|
|||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
now = getattr(time, 'monotonic', time.time)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer # Python 2
|
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer # Python 2
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -58,6 +60,20 @@ class MyHandler(BaseHTTPRequestHandler):
|
|||||||
if not self.validate_auth():
|
if not self.validate_auth():
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if metadata['fail_interval_start'] != -1 and metadata['fail_interval_stop'] != -1:
|
||||||
|
if metadata['fail_interval_base_time'] is None:
|
||||||
|
metadata['fail_interval_base_time'] = now()
|
||||||
|
time_since_start = now() - metadata['fail_interval_base_time']
|
||||||
|
if metadata['fail_interval_start'] <= time_since_start <= metadata['fail_interval_stop']:
|
||||||
|
if metadata['fail_with_delay_secs']:
|
||||||
|
print("sleeping for: {0}".format(metadata['fail_with_delay_secs']))
|
||||||
|
time.sleep(metadata['fail_with_delay_secs'])
|
||||||
|
code = metadata['fail_with'] if metadata['fail_with'] else 500
|
||||||
|
self.send_response(code)
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b'INTERVAL FAILURE')
|
||||||
|
return
|
||||||
|
|
||||||
if metadata['fail_with_400_after'] != -1 and metadata['posts'] > metadata['fail_with_400_after']:
|
if metadata['fail_with_400_after'] != -1 and metadata['posts'] > metadata['fail_with_400_after']:
|
||||||
if metadata['fail_with_delay_secs']:
|
if metadata['fail_with_delay_secs']:
|
||||||
print("sleeping for: {0}".format(metadata['fail_with_delay_secs']))
|
print("sleeping for: {0}".format(metadata['fail_with_delay_secs']))
|
||||||
@ -136,6 +152,10 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument('--fail-with-401-or-403-after', action='store', type=int,
|
parser.add_argument('--fail-with-401-or-403-after', action='store', type=int,
|
||||||
default=-1, help='fail with 401 or 403 after n posts')
|
default=-1, help='fail with 401 or 403 after n posts')
|
||||||
parser.add_argument('--fail-with-delay-secs', action='store', type=int, default=0, help='fail with n secs of delay')
|
parser.add_argument('--fail-with-delay-secs', action='store', type=int, default=0, help='fail with n secs of delay')
|
||||||
|
parser.add_argument('--fail-interval-start', action='store', type=int,
|
||||||
|
default=-1, help='start failing after n seconds from the first POST')
|
||||||
|
parser.add_argument('--fail-interval-stop', action='store', type=int,
|
||||||
|
default=-1, help='stop failing after n seconds from the first POST')
|
||||||
parser.add_argument('--decompress', action='store_true', default=False, help='decompress posted data')
|
parser.add_argument('--decompress', action='store_true', default=False, help='decompress posted data')
|
||||||
parser.add_argument('--userpwd', action='store', default='', help='only accept this user:password combination')
|
parser.add_argument('--userpwd', action='store', default='', help='only accept this user:password combination')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -145,6 +165,9 @@ if __name__ == '__main__':
|
|||||||
metadata['fail_with_400_after'] = args.fail_with_400_after
|
metadata['fail_with_400_after'] = args.fail_with_400_after
|
||||||
metadata['fail_with_401_or_403_after'] = args.fail_with_401_or_403_after
|
metadata['fail_with_401_or_403_after'] = args.fail_with_401_or_403_after
|
||||||
metadata['fail_with_delay_secs'] = args.fail_with_delay_secs
|
metadata['fail_with_delay_secs'] = args.fail_with_delay_secs
|
||||||
|
metadata['fail_interval_start'] = args.fail_interval_start
|
||||||
|
metadata['fail_interval_stop'] = args.fail_interval_stop
|
||||||
|
metadata['fail_interval_base_time'] = None
|
||||||
metadata['decompress'] = args.decompress
|
metadata['decompress'] = args.decompress
|
||||||
metadata['userpwd'] = args.userpwd
|
metadata['userpwd'] = args.userpwd
|
||||||
server = HTTPServer((args.interface, args.port), MyHandler)
|
server = HTTPServer((args.interface, args.port), MyHandler)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user