rsyslog/tests/omprog-transactions-failed-messages.sh
Andre Lorbach 19ae11b869 Fix transaction suspension handling for issue #2420
- action.c: Add iSuspended flag to prevent infinite loops when transactions
  are suspended multiple times. Retry on first suspension
  and abort with RS_RET_SUSPENDED on subsequent suspensions.

- tests/omprog-transactions-failed-messages.sh: Remove TODO comment and
  workaround code related to issue #2420 (deferred messages within
  transactions not being retried), as the underlying issue appears to
  be resolved.

- tests/omprog-feedback-timeout.sh: Update expected output to reflect
  improved transaction handling behavior. The test now expects additional
  message processing cycles and proper timeout handling when the omprog
  action is suspended and restarted.

- tests/omprog-feedback.sh: Make robust against timing variations from
  new action.c retry logic by replacing exact sequence matching with
  pattern-based validation to ensure cross-system compatibility.

- tests/omhttp-retry-timeout.sh: Optimize test parameters for better
  reliability by reducing message count from 10000 to 5000, adding
  sequence check options, and reducing queue batch size from 2048 to 500
  to prevent test timeouts and improve stability.

- omhttp-batch-fail-with-400.sh test: resolve queue growth issue with
  HTTP 400 errors. The test was experiencing a queue growth issue where
  the queue size was increasing. This was caused by the omhttp module
  incorrectly treating HTTP 400 errors as retriable when they should be
  treated as permanent failures.
  FIX: Added httpretrycodes=["500", "502", "503", "504"] configuration.
  This explicitly specifies that only 5xx server errors should be retried.
  HTTP 400 errors are now properly treated as permanent failures.

Some tests needed to be adapted, because they expected an "exactly once"
paradigm, which the fixed bug seemed to provide in some cases (but not
reliably). Actually, rsyslog guarantees "at least once", so duplicates
can occur and are typical if transaction-like logic is used with
non-transactional outputs.

This addresses the transaction suspension edge case and cleans up
temporary workaround code that is no longer needed. The test updates
ensure that the improved transaction handling behavior is properly
validated across different scenarios and that tests correctly reflect
rsyslog's actual delivery semantics.

closes https://github.com/rsyslog/rsyslog/issues/2420
2025-08-15 14:28:34 +02:00

147 lines
4.3 KiB
Bash
Executable File

#!/bin/bash
# This file is part of the rsyslog project, released under ASL 2.0
# This test tests omprog with the confirmMessages=on and useTransactions=on
# parameters, with the external program returning an error on certain
# messages.
. ${srcdir:=.}/diag.sh init
generate_conf
add_conf '
module(load="../plugins/omprog/.libs/omprog")
template(name="outfmt" type="string" string="%msg%\n")
:msg, contains, "msgnum:" {
action(
type="omprog"
binary=`echo $srcdir/testsuites/omprog-transactions-bin.sh --failed_messages`
template="outfmt"
name="omprog_action"
queue.type="Direct" # the default; facilitates sync with the child process
queue.dequeueBatchSize="6"
confirmMessages="on"
useTransactions="on"
action.resumeRetryCount="10"
action.resumeInterval="1"
)
}
'
startup
injectmsg 0 10
shutdown_when_empty
wait_shutdown
# Since the transaction boundaries are not deterministic, we cannot check for
# an exact expected output. We must check the output programmatically.
transaction_state="NONE"
status_expected=true
messages_to_commit=()
messages_processed=()
line_num=1
error=
while IFS= read -r line; do
if [[ $status_expected == true ]]; then
case "$transaction_state" in
"NONE")
if [[ "$line" != "<= OK" ]]; then
error="expecting an OK status from script"
break
fi
;;
"STARTED")
if [[ "$line" != "<= OK" ]]; then
error="expecting an OK status from script"
break
fi
transaction_state="ACTIVE"
;;
"ACTIVE")
if [[ "$line" == "<= Error: could not process log message" ]]; then
messages_to_commit=()
transaction_state="NONE"
elif [[ "$line" != "<= DEFER_COMMIT" ]]; then
error="expecting a DEFER_COMMIT status from script"
break
fi
;;
"COMMITTED")
if [[ "$line" != "<= OK" ]]; then
error="expecting an OK status from script"
break
fi
messages_processed+=("${messages_to_commit[@]}")
messages_to_commit=()
transaction_state="NONE"
;;
esac
status_expected=false;
else
if [[ "$line" == "=> BEGIN TRANSACTION" ]]; then
if [[ "$transaction_state" != "NONE" ]]; then
error="unexpected transaction start"
break
fi
transaction_state="STARTED"
elif [[ "$line" == "=> COMMIT TRANSACTION" ]]; then
if [[ "$transaction_state" != "ACTIVE" ]]; then
error="unexpected transaction commit"
break
fi
transaction_state="COMMITTED"
else
if [[ "$transaction_state" != "ACTIVE" ]]; then
error="unexpected message outside a transaction"
break
fi
if [[ "$line" != "=> msgnum:"* ]]; then
error="unexpected message contents"
break
fi
prefix_to_remove="=> "
messages_to_commit+=("${line#$prefix_to_remove}")
fi
status_expected=true;
fi
((line_num++))
done < $RSYSLOG_OUT_LOG
if [[ -z "$error" && "$transaction_state" != "NONE" ]]; then
error="unexpected end of file (transaction state: $transaction_state)"
fi
if [[ -n "$error" ]]; then
echo "$RSYSLOG_OUT_LOG: line $line_num: $error"
cat $RSYSLOG_OUT_LOG
error_exit 1
fi
# Since the order in which failed messages are retried by rsyslog is not
# deterministic, we sort the processed messages before checking them.
IFS=$'\n' messages_sorted=($(sort <<<"${messages_processed[*]}"))
unset IFS
expected_messages=(
"msgnum:00000000:"
"msgnum:00000001:"
"msgnum:00000002:"
"msgnum:00000003:"
"msgnum:00000004:"
"msgnum:00000005:"
"msgnum:00000006:"
"msgnum:00000007:"
"msgnum:00000008:"
"msgnum:00000009:"
)
if [[ "${messages_sorted[*]}" != "${expected_messages[*]}" ]]; then
echo "unexpected set of processed messages:"
printf '%s\n' "${messages_processed[@]}"
echo "contents of $RSYSLOG_OUT_LOG:"
cat $RSYSLOG_OUT_LOG
error_exit 1
fi
exit_test