rsyslog/.codex/pre_commit_format_gate.sh
Rainer Gerhards 9d384cfdbb .codex: restore wrapped git commit hook matching
Why: recent Codex CLI payloads now wrap shell commands as
`bash -lc '...'`, so the repo-local pre-commit gate stopped
recognizing `git commit` invocations and no longer ran.

Impact: the hook now intercepts both direct and shell-wrapped
`git commit` commands before the commit proceeds.

Before/After: wrapped `git commit` payloads bypassed the gate;
now they are detected and processed like direct invocations.

Technical Overview:
Teach the hook parser to unwrap `bash`, `sh`, and `zsh`
launchers that pass commands via `-c` or `-lc`.

Recurse into the nested shell command and reuse the existing
`git commit` matcher on the extracted simple commands.

Document the wrapped-command form in `.codex/README.md` so the
expected runtime shape stays visible next to the hook setup.

Validation: `bash -n .codex/pre_commit_format_gate.sh`; manual
PreToolUse payload smoke tests for direct and wrapped
`git commit --help` commands.

With the help of AI-Agents: Codex
2026-04-23 08:47:25 +02:00

412 lines
9.5 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
payload="$(cat)"
if [[ -z "${payload}" ]]; then
exit 0
fi
should_gate="$(
PAYLOAD="$payload" python3 <<'PY'
import json
import os
import posixpath
import re
import shlex
import sys
def split_simple_commands(command: str) -> list[list[str]]:
lexer = shlex.shlex(command, posix=True, punctuation_chars=";&|()")
lexer.whitespace_split = True
lexer.commenters = ""
commands = []
current = []
for token in lexer:
if re.fullmatch(r"[;&|()]+", token):
if current:
commands.append(current)
current = []
continue
current.append(token)
if current:
commands.append(current)
return commands
def strip_prefixes(words: list[str]) -> list[str]:
i = 0
assignment_re = re.compile(r"[A-Za-z_][A-Za-z0-9_]*=.*")
while i < len(words):
token = words[i]
if token == "sudo":
i += 1
continue
if token in {"command", "builtin", "noglob", "time"}:
i += 1
continue
if token == "env":
i += 1
while i < len(words) and assignment_re.fullmatch(words[i]):
i += 1
continue
if assignment_re.fullmatch(token):
i += 1
continue
break
return words[i:]
def unwrap_shell_command(words: list[str]) -> list[str] | None:
words = strip_prefixes(words)
if not words:
return None
shell_name = posixpath.basename(words[0])
if shell_name not in {"bash", "sh", "zsh"}:
return None
i = 1
while i < len(words):
token = words[i]
if token == "--":
i += 1
break
if token.startswith("-") and "c" in token[1:]:
if i + 1 >= len(words):
return None
return words[i + 1 :]
if token.startswith("-"):
i += 1
continue
return None
return None
def is_git_commit(words: list[str]) -> bool:
words = strip_prefixes(words)
if not words or words[0] != "git":
return False
i = 1
while i < len(words):
token = words[i]
if token == "commit":
return True
if token == "--":
return False
if token in {
"-c",
"-C",
"--git-dir",
"--work-tree",
"--namespace",
"--super-prefix",
"--config-env",
}:
i += 2
continue
if token.startswith("--git-dir=") or token.startswith("--work-tree=") \
or token.startswith("--namespace=") or token.startswith("--super-prefix=") \
or token.startswith("--config-env="):
i += 1
continue
if token.startswith("-"):
i += 1
continue
return False
return False
def contains_git_commit(words: list[str]) -> bool:
if is_git_commit(words):
return True
nested_command = unwrap_shell_command(words)
if not nested_command:
return False
command = nested_command[0]
if not isinstance(command, str) or not command.strip():
return False
try:
commands = split_simple_commands(command)
except ValueError:
return False
return any(contains_git_commit(simple_command) for simple_command in commands)
payload_raw = os.environ.get("PAYLOAD")
if payload_raw is None:
sys.exit(0)
try:
payload = json.loads(payload_raw)
except json.JSONDecodeError:
sys.exit(0)
if payload.get("hook_event_name") != "PreToolUse":
sys.exit(0)
if payload.get("tool_name") != "Bash":
sys.exit(0)
tool_input = payload.get("tool_input") or {}
command = tool_input.get("command")
if not isinstance(command, str) or not command.strip():
sys.exit(0)
try:
commands = split_simple_commands(command)
except ValueError:
sys.exit(0)
for simple_command in commands:
if contains_git_commit(simple_command):
print("yes")
break
PY
)"
if [[ "${should_gate}" != "yes" ]]; then
exit 0
fi
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
repo_root="$(cd -- "${script_dir}/.." && pwd)"
cd "${repo_root}"
tracked_changed_c_h_files() {
./devtools/list-git-changed-c-h-files.sh
}
snapshot_tracked_changed_c_h_files() {
local tracked_files
mapfile -t tracked_files < <(tracked_changed_c_h_files)
if [[ "${#tracked_files[@]}" -eq 0 ]]; then
printf '[]'
return
fi
python3 - "${tracked_files[@]}" <<'PY'
import hashlib
import json
import sys
from pathlib import Path
entries = []
for raw_path in sys.argv[1:]:
path = Path(raw_path)
if not path.is_file():
continue
digest = hashlib.sha256(path.read_bytes()).hexdigest()
entries.append([path.as_posix(), digest])
print(json.dumps(entries, separators=(",", ":")))
PY
}
run_repo_policy_focus_check() {
local tmpdir base_ref snapshot_commit applicable_count
if ! git rev-parse --verify HEAD >/dev/null 2>&1; then
return 0
fi
if ! command -v python3 >/dev/null 2>&1; then
return 0
fi
tmpdir="$(mktemp -d)"
cleanup_repo_policy_focus_check() {
rm -rf -- "${tmpdir}"
}
trap cleanup_repo_policy_focus_check RETURN
base_ref="$(git rev-parse HEAD)"
snapshot_commit="$(
printf 'codex repo-policy focus snapshot\n' | git commit-tree "$(git write-tree)" -p "${base_ref}"
)"
python3 scripts/build_repo_policy_focus_input.py \
--base "${base_ref}" \
--head "${snapshot_commit}" \
--output "${tmpdir}/review-package.json"
applicable_count="$(
python3 - "${tmpdir}/review-package.json" <<'PY'
import json
import sys
from pathlib import Path
package = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
print(int(package.get("applicable_count", 0)))
PY
)"
if [[ "${applicable_count}" == "0" ]]; then
return 0
fi
python3 scripts/evaluate_repo_policy_focus.py \
--input "${tmpdir}/review-package.json" \
--output "${tmpdir}/model-output.json"
python3 scripts/score_repo_policy_focus.py \
--review-input "${tmpdir}/review-package.json" \
--model-output "${tmpdir}/model-output.json" \
--summary-md "${tmpdir}/summary.md" \
--summary-json "${tmpdir}/summary.json"
python3 - "${tmpdir}/summary.json" <<'PY'
import json
import sys
from pathlib import Path
summary = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
warnings = [check for check in summary["checks"] if check["status"] == "warn"]
failures = [check for check in summary["checks"] if check["status"] == "fail"]
if warnings:
print("Focused repo-policy warnings:", file=sys.stderr)
for check in warnings:
print(f"- {check['id']}: {check['reason']}", file=sys.stderr)
for issue in check["issues"]:
location = issue["file"] or "(no file)"
if issue["line"]:
location = f"{location}:{issue['line']}"
print(f" * {location} - {issue['message']}", file=sys.stderr)
if not failures:
raise SystemExit(0)
print("Focused repo-policy checks failed:", file=sys.stderr)
for check in failures:
print(f"- {check['id']}: {check['reason']}", file=sys.stderr)
for issue in check["issues"]:
location = issue["file"] or "(no file)"
if issue["line"]:
location = f"{location}:{issue['line']}"
print(f" * {location} - {issue['message']}", file=sys.stderr)
raise SystemExit(1)
PY
}
if ! run_repo_policy_focus_check; then
cat <<'EOF'
{
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"permissionDecision": "deny",
"permissionDecisionReason": "Deterministic repo-policy checks failed. Fix the reported invariant violations before running git commit again."
}
}
EOF
exit 0
fi
if ! command -v clang-format-18 >/dev/null 2>&1; then
exit 0
fi
git_c_h_change_state="$(
python3 <<'PY'
import subprocess
def listed_diff(*args: str) -> set[str]:
proc = subprocess.run(
["git", "diff", *args, "--diff-filter=d", "--name-only", "--", "*.c", "*.h"],
check=True,
text=True,
capture_output=True,
)
return {line for line in proc.stdout.splitlines() if line}
unstaged = listed_diff()
staged = listed_diff("--cached")
print("changed=yes" if (unstaged or staged) else "changed=no")
for path in sorted(unstaged & staged):
print(path)
PY
)"
if [[ "${git_c_h_change_state}" == changed=no* ]]; then
exit 0
fi
partially_staged_c_h_files="$(printf '%s\n' "${git_c_h_change_state}" | sed '1d')"
if [[ -n "${partially_staged_c_h_files}" ]]; then
cat <<'EOF'
{
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"permissionDecision": "deny",
"permissionDecisionReason": "Commit blocked: partially staged .c/.h files are present. The formatter hook cannot safely restage those files automatically."
}
}
EOF
exit 0
fi
mapfile -t initially_staged_c_h_files < <(
git diff --cached --diff-filter=d --name-only -- '*.c' '*.h'
)
snapshot_files="$(snapshot_tracked_changed_c_h_files)"
if ./devtools/format-code.sh --git-changed >&2; then
snapshot_after="$(snapshot_tracked_changed_c_h_files)"
if [[ "${snapshot_files}" == "${snapshot_after}" ]]; then
exit 0
fi
if [[ "${#initially_staged_c_h_files[@]}" -gt 0 ]]; then
git add -- "${initially_staged_c_h_files[@]}"
fi
exit 0
fi
cat <<'EOF'
{
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"permissionDecision": "deny",
"permissionDecisionReason": "Formatting failed: ./devtools/format-code.sh exited non-zero. Fix formatting before running git commit."
}
}
EOF