mirror of
https://github.com/rsyslog/rsyslog.git
synced 2026-06-15 16:22:41 +02:00
- Add optional impstats sidecar on collector server via init.sh - Prompts or SERVER_IMPSTATS_SIDECAR=true - check_sidecar_requirements() prompts for python3-venv on Debian/Ubuntu - Auto-add server to node and impstats Prometheus targets (no duplicates) - Traefik: separate rate-limit-grafana (600/min, burst 300) for Grafana - Reduces 429 errors on dashboard reload - Host Metrics: Node Status Overview panel height set to 10 - Fixed height; table scrolls for many hosts - Docs: installation, grafana_dashboards, client_setup, troubleshooting - SERVER_IMPSTATS_SIDECAR, server auto-registration, 429, ensurepip
323 lines
10 KiB
YAML
323 lines
10 KiB
YAML
services:
|
|
traefik:
|
|
image: traefik:v3.6.2
|
|
container_name: traefik-central
|
|
restart: unless-stopped
|
|
command:
|
|
- --api.dashboard=true
|
|
- --api.insecure=false
|
|
- --ping=true
|
|
- --providers.docker=true
|
|
- --providers.docker.exposedbydefault=false
|
|
- --providers.docker.network=rosi-collector-net
|
|
- --providers.file.filename=/etc/traefik/dynamic.yml
|
|
- --providers.file.watch=true
|
|
- --entrypoints.web.address=:80
|
|
- --entrypoints.websecure.address=:443
|
|
- --entrypoints.prometheus.address=:9090
|
|
- --certificatesresolvers.letsencrypt.acme.tlschallenge=true
|
|
- --certificatesresolvers.letsencrypt.acme.email=${TRAEFIK_EMAIL}
|
|
- --certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json
|
|
- --entrypoints.web.http.redirections.entrypoint.to=websecure
|
|
- --entrypoints.web.http.redirections.entrypoint.scheme=https
|
|
logging:
|
|
driver: json-file
|
|
options:
|
|
max-size: "10m"
|
|
max-file: "3"
|
|
ports:
|
|
- "80:80"
|
|
- "443:443"
|
|
- "9090:9090"
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
- ./traefik/letsencrypt:/letsencrypt
|
|
- ./traefik/dynamic.yml:/etc/traefik/dynamic.yml:ro
|
|
networks:
|
|
- rosi-collector-net
|
|
healthcheck:
|
|
test: ["CMD", "traefik", "healthcheck", "--ping"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 10s
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '1'
|
|
memory: 512M
|
|
reservations:
|
|
cpus: '0.25'
|
|
memory: 128M
|
|
loki:
|
|
image: grafana/loki:2.9.6
|
|
command: -config.file=/etc/loki/local-config.yml
|
|
ports:
|
|
- "127.0.0.1:3100:3100"
|
|
volumes:
|
|
- loki-data:/loki
|
|
- ./loki-config.yml:/etc/loki/local-config.yml:ro
|
|
restart: unless-stopped
|
|
networks:
|
|
- rosi-collector-net
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:3100/ready >/dev/null 2>&1 || exit 1"]
|
|
interval: 10s
|
|
timeout: 3s
|
|
retries: 10
|
|
start_period: 10s
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
cap_drop:
|
|
- ALL
|
|
grafana:
|
|
image: grafana/grafana:11.4.0
|
|
env_file: .env
|
|
environment:
|
|
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
|
|
GF_USERS_ALLOW_SIGN_UP: "false"
|
|
GF_SERVER_ROOT_URL: https://${TRAEFIK_DOMAIN}/
|
|
GF_SERVER_DOMAIN: ${TRAEFIK_DOMAIN}
|
|
# Increase datasource proxy limits to handle more concurrent requests
|
|
GF_DATAPROXY_MAX_IDLE_CONNS_PER_HOST: "300"
|
|
GF_DATAPROXY_MAX_IDLE_CONNS: "300"
|
|
GF_DATAPROXY_TIMEOUT: "60"
|
|
GF_DATAPROXY_KEEP_ALIVE: "60"
|
|
# Increase query timeout
|
|
GF_DATAPROXY_QUERY_TIMEOUT: "60"
|
|
# Allow more concurrent requests per datasource
|
|
GF_DATAPROXY_MAX_CONCURRENT_REQUESTS: "250"
|
|
# Increase response limit for large queries
|
|
GF_DATAPROXY_RESPONSE_LIMIT: "0"
|
|
# SMTP configuration for alerting
|
|
GF_SMTP_ENABLED: ${SMTP_ENABLED:-false}
|
|
GF_SMTP_HOST: ${SMTP_HOST:-}
|
|
GF_SMTP_PORT: ${SMTP_PORT:-587}
|
|
GF_SMTP_USER: ${SMTP_USER:-}
|
|
GF_SMTP_PASSWORD: ${SMTP_PASSWORD:-}
|
|
GF_SMTP_FROM_ADDRESS: ${ALERT_EMAIL_FROM:-}
|
|
GF_SMTP_FROM_NAME: "Rsyslog Central Alerts"
|
|
GF_SMTP_SKIP_VERIFY: ${SMTP_SKIP_VERIFY:-false}
|
|
# Set default home dashboard to Syslog Explorer
|
|
GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: /etc/grafana/provisioning/dashboards/generated/syslog-explorer.json
|
|
ports:
|
|
- "127.0.0.1:3000:3000"
|
|
volumes:
|
|
- grafana-data:/var/lib/grafana
|
|
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
|
depends_on:
|
|
- loki
|
|
- traefik
|
|
restart: unless-stopped
|
|
networks:
|
|
- rosi-collector-net
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 30s
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '1'
|
|
memory: 1G
|
|
reservations:
|
|
cpus: '0.25'
|
|
memory: 256M
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
cap_drop:
|
|
- ALL
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- 'traefik.http.routers.grafana.rule=Host(`${TRAEFIK_DOMAIN}`) && !PathPrefix(`/downloads`)'
|
|
- "traefik.http.routers.grafana.entrypoints=websecure"
|
|
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.grafana.middlewares=security-headers@file,rate-limit-grafana@file"
|
|
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
|
- "traefik.http.routers.grafana.priority=1"
|
|
prometheus:
|
|
image: prom/prometheus:v3.1.0
|
|
container_name: prometheus-central
|
|
command:
|
|
- --config.file=/etc/prometheus/prometheus.yml
|
|
- --storage.tsdb.path=/prometheus
|
|
- --web.console.libraries=/usr/share/prometheus/console_libraries
|
|
- --web.console.templates=/usr/share/prometheus/consoles
|
|
volumes:
|
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- ./prometheus-targets:/etc/prometheus/targets:ro
|
|
- prometheus-data:/prometheus
|
|
restart: unless-stopped
|
|
networks:
|
|
- rosi-collector-net
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 15s
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '1'
|
|
memory: 1G
|
|
reservations:
|
|
cpus: '0.25'
|
|
memory: 256M
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
cap_drop:
|
|
- ALL
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- 'traefik.http.routers.prometheus.rule=Host(`${TRAEFIK_DOMAIN}`)'
|
|
- "traefik.http.routers.prometheus.entrypoints=prometheus"
|
|
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.prometheus.middlewares=admin-auth@file,security-headers@file,rate-limit@file"
|
|
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
|
|
|
rsyslog:
|
|
# NOTE: Using :latest is intentional - we want users to always get the newest
|
|
# rsyslog-collector image with security fixes. For production pinning, override
|
|
# in docker-compose.override.yml or use RSYSLOG_IMAGE environment variable.
|
|
image: rsyslog/rsyslog-collector:latest
|
|
env_file: .env
|
|
environment:
|
|
WRITE_JSON_FILE: ${WRITE_JSON_FILE:-off}
|
|
container_name: rsyslog-central
|
|
ports:
|
|
- "0.0.0.0:514:514/udp"
|
|
- "[::]:514:514/udp"
|
|
- "0.0.0.0:10514:514/tcp"
|
|
- "[::]:10514:514/tcp"
|
|
volumes:
|
|
- /var/log/rsyslog-central:/var/log
|
|
- ./rsyslog.conf/30-send-loki-http.conf:/etc/rsyslog.d/30-send-loki-http.conf:ro
|
|
- ./rsyslog.conf/80-file-output.conf:/etc/rsyslog.d/80-file-output.conf:ro
|
|
restart: unless-stopped
|
|
depends_on:
|
|
loki:
|
|
condition: service_healthy
|
|
networks:
|
|
- rosi-collector-net
|
|
# Healthcheck: container runs rsyslog in foreground (-n), so no PID file exists; check process instead.
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pidof rsyslogd >/dev/null || exit 1"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 10s
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
cap_drop:
|
|
- ALL
|
|
cap_add:
|
|
- NET_BIND_SERVICE
|
|
- SYSLOG
|
|
|
|
# TLS-enabled rsyslog on port 6514 (only starts when SYSLOG_TLS_ENABLED=true)
|
|
# Uses the built-in TLS support from rsyslog-collector image (PR #6336)
|
|
rsyslog-tls:
|
|
# NOTE: Using :latest is intentional - same as rsyslog service above
|
|
image: rsyslog/rsyslog-collector:latest
|
|
env_file: .env
|
|
environment:
|
|
WRITE_JSON_FILE: ${WRITE_JSON_FILE:-off}
|
|
# Disable plain TCP/UDP since the main rsyslog container handles those
|
|
ENABLE_UDP: "off"
|
|
ENABLE_TCP: "off"
|
|
# Enable TLS on port 6514
|
|
ENABLE_TLS: "on"
|
|
TLS_CA_FILE: "/etc/rsyslog.d/certs/ca.pem"
|
|
TLS_CERT_FILE: "/etc/rsyslog.d/certs/server-cert.pem"
|
|
TLS_KEY_FILE: "/etc/rsyslog.d/certs/server-key.pem"
|
|
# x509/certvalid = accept any client with valid CA-signed cert
|
|
# x509/name = require StreamDriverPermittedPeers config (stricter)
|
|
TLS_AUTH_MODE: "x509/certvalid"
|
|
container_name: rsyslog-tls-central
|
|
profiles:
|
|
- tls
|
|
ports:
|
|
- "0.0.0.0:6514:6514/tcp"
|
|
- "[::]:6514:6514/tcp"
|
|
volumes:
|
|
- /var/log/rsyslog-central:/var/log
|
|
- ./rsyslog.conf/30-send-loki-http.conf:/etc/rsyslog.d/30-send-loki-http.conf:ro
|
|
- ./rsyslog.conf/80-file-output.conf:/etc/rsyslog.d/80-file-output.conf:ro
|
|
- ./certs:/etc/rsyslog.d/certs:ro
|
|
restart: unless-stopped
|
|
depends_on:
|
|
loki:
|
|
condition: service_healthy
|
|
networks:
|
|
- rosi-collector-net
|
|
# Healthcheck: container runs rsyslog in foreground (-n), so no PID file exists; check process instead.
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pidof rsyslogd >/dev/null || exit 1"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 10s
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
cap_drop:
|
|
- ALL
|
|
cap_add:
|
|
- NET_BIND_SERVICE
|
|
- SYSLOG
|
|
|
|
downloads:
|
|
image: nginx:1.27.3-alpine
|
|
container_name: downloads-central
|
|
restart: unless-stopped
|
|
user: "101:101"
|
|
read_only: true
|
|
tmpfs:
|
|
- /var/cache/nginx:uid=101,gid=101
|
|
- /var/run:uid=101,gid=101
|
|
- /tmp:uid=101,gid=101
|
|
volumes:
|
|
- ./downloads:/usr/share/nginx/html/downloads:ro
|
|
networks:
|
|
- rosi-collector-net
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:80/ || exit 1"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 10s
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '0.5'
|
|
memory: 128M
|
|
reservations:
|
|
cpus: '0.1'
|
|
memory: 32M
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
cap_drop:
|
|
- ALL
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- 'traefik.http.routers.downloads.rule=Host(`${TRAEFIK_DOMAIN}`) && PathPrefix(`/downloads`)'
|
|
- "traefik.http.routers.downloads.entrypoints=websecure"
|
|
- "traefik.http.routers.downloads.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.downloads.middlewares=security-headers@file"
|
|
- "traefik.http.services.downloads.loadbalancer.server.port=80"
|
|
- "traefik.http.routers.downloads.priority=100"
|
|
|
|
|
|
volumes:
|
|
loki-data:
|
|
grafana-data:
|
|
prometheus-data:
|
|
|
|
networks:
|
|
rosi-collector-net:
|
|
name: rosi-collector-net
|
|
external: true
|
|
driver: bridge
|