#!/bin/bash
# =============================================================================
# DNS-Audit -- Cluster-weite DNS/Regel-Konsistenzpruefung
# =============================================================================
#
# ZWECK:
#   Verifiziert dass fuer JEDEN der 5 mTLS-Cluster-Server die in
#   dns-maintenance.sh hinterlegte Regel-Soll-Liste (is_record_expected +
#   ROOT_RECORDS_SRV1) exakt mit dem im Cloudflare-DNS tatsaechlich
#   vorhandenen Zustand uebereinstimmt.
#
# WARUM WICHTIG:
#   dns-maintenance.sh "restore" ist REGEL-basiert, nicht state-basiert.
#   Wenn die Regeln von der Realitaet abdriften (z.B. neue Stage/Service
#   hinzugefuegt aber is_record_expected() nicht angepasst), dann wuerde
#   ein drain+restore Records permanent verlieren. Dieses Audit fuehrt
#   genau diesen Abgleich durch -- wenn es grune Lichter liefert,
#   ist drain+restore bijektiv (lossless).
#
# USAGE:
#   ./dns-audit.sh                     # volle Pruefung, exit 0 wenn sauber
#   ./dns-audit.sh --quick             # nur Counts pro Server, schneller
#   ./dns-audit.sh --server N          # nur Server N (1..5)
#
# ENV: CF_TOKEN_BUSINESS, CF_TOKEN_SERVICES muessen gesetzt sein.
# =============================================================================

set -euo pipefail

GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

ZONE_ID_BUSINESS="36e79a0dc1c69628fa1e23aae9d5e9b8"
ZONE_ID_SERVICES="ada86c619846d0486e721dd85381ad00"

declare -A SERVER_IPV4=(
    [1]=188.245.157.241
    [2]=116.203.243.240
    [3]=49.12.76.141
    [4]=91.98.147.79
    [5]=46.62.131.226
)
declare -A SERVER_IPV6=(
    [1]=2a01:4f8:c0c:af60::1
    [2]=2a01:4f8:1c1a:b9f1::1
    [3]=2a01:4f8:c17:7411::1
    [4]=2a01:4f8:c012:e96c::1
    [5]=2a01:4f9:c012:4f8f::1
)
declare -A SERVER_NAME=(
    [1]=Cert-Server-1-NBG
    [2]=Cert-Server-0-NBG
    [3]=Cert-Server-0-FSN
    [4]=Cert-Server-1-FSN
    [5]=Cert-Server-HEL
)

# ---- Regel-Soll: 1:1 Spiegel von dns-maintenance.sh ----
SERVICES=(cs cert ccc cs-b cs-bw ccc-b ccc-bw)
STAGES=(dev int tst lup pen pres abn prd)
DOMAINS=(business services)
ROOT_RECORDS_SRV1=(
    "ccc.bahn.business"
    "cs.bahn.business"
    "ccc.bahn.services"
    "cert.bahn.services"
    "cs.bahn.services"
)

is_record_expected() {
    local server_num=$1 service=$2 stage=$3 domain=$4
    if [[ "$stage" == "tst" && "$domain" == "services" && "$server_num" -ne 1 \
          && ("$service" == "cs" || "$service" == "cert" || "$service" == "ccc") ]]; then
        return 1
    fi
    if [[ "$service" == "cert" && "$domain" == "business" && "$stage" != "dev" ]]; then
        return 1
    fi
    if [[ ("$service" == "cs-b" || "$service" == "ccc-b" || "$service" == "cs-bw" || "$service" == "ccc-bw") \
          && ("$stage" == "prd" || "$stage" == "abn") \
          && "$domain" == "business" ]]; then
        return 1
    fi
    if [[ ("$service" == "cs-b" || "$service" == "ccc-b" || "$service" == "cs-bw" || "$service" == "ccc-bw") \
          && "$stage" == "dev" && "$domain" == "business" && "$server_num" -ne 1 ]]; then
        return 1
    fi
    if [[ ("$service" == "cs-b" || "$service" == "ccc-b" || "$service" == "cs-bw" || "$service" == "ccc-bw") \
          && "$stage" == "prd" && "$domain" == "services" && "$server_num" -ne 1 ]]; then
        return 1
    fi
    return 0
}

get_expected_hostnames() {
    local server_num=$1
    for service in "${SERVICES[@]}"; do
        for stage in "${STAGES[@]}"; do
            for domain in "${DOMAINS[@]}"; do
                if is_record_expected "$server_num" "$service" "$stage" "$domain"; then
                    echo "${service}.${stage}.bahn.${domain}"
                fi
            done
        done
    done
    if [[ "$server_num" -eq 1 ]]; then
        for r in "${ROOT_RECORDS_SRV1[@]}"; do echo "$r"; done
    fi
}

# ---- Cloudflare-API: vollstaendig paginiert ----
fetch_zone_records() {
    local zone_id=$1 token=$2
    local page=1 all='[]'
    while true; do
        local resp
        resp=$(curl -sS "https://api.cloudflare.com/client/v4/zones/${zone_id}/dns_records?per_page=100&page=${page}" \
            -H "Authorization: Bearer ${token}")
        local cnt
        cnt=$(echo "$resp" | jq '.result | length')
        if [[ "$cnt" -eq 0 ]]; then break; fi
        all=$(jq -s '.[0] + .[1].result' <(echo "$all") <(echo "$resp"))
        page=$((page + 1))
        [[ $page -gt 50 ]] && break
    done
    echo "$all"
}

actual_hostnames_for_server() {
    local srv=$1
    local v4=${SERVER_IPV4[$srv]} v6=${SERVER_IPV6[$srv]}
    jq -r --arg v4 "$v4" --arg v6 "$v6" \
       '.[] | select((.type=="A" and .content==$v4) or (.type=="AAAA" and .content==$v6)) | .name' \
       <(echo "$1"; :) 2>/dev/null || true
}

# ---- Main ----
MODE="full"
TARGET_SERVER=""
while [[ $# -gt 0 ]]; do
    case "$1" in
        --quick) MODE="quick"; shift ;;
        --server) TARGET_SERVER=$2; shift 2 ;;
        -h|--help) sed -n '2,30p' "$0"; exit 0 ;;
        *) echo "Unknown arg: $1"; exit 1 ;;
    esac
done

[[ -z "${CF_TOKEN_BUSINESS:-}" || -z "${CF_TOKEN_SERVICES:-}" ]] && {
    echo -e "${RED}CF_TOKEN_BUSINESS/CF_TOKEN_SERVICES nicht gesetzt${NC}"; exit 2; }

echo -e "${BLUE}=== DNS-Audit Cluster Coverage ===${NC}"
echo "Fetching zone records..."
BUSINESS_JSON=$(fetch_zone_records "$ZONE_ID_BUSINESS" "$CF_TOKEN_BUSINESS")
SERVICES_JSON=$(fetch_zone_records "$ZONE_ID_SERVICES" "$CF_TOKEN_SERVICES")
ALL_JSON=$(jq -s '.[0] + .[1]' <(echo "$BUSINESS_JSON") <(echo "$SERVICES_JSON"))

B_CNT=$(echo "$BUSINESS_JSON" | jq 'length')
S_CNT=$(echo "$SERVICES_JSON" | jq 'length')
echo "  bahn.business: $B_CNT records, bahn.services: $S_CNT records"
echo ""

exit_code=0
for n in 1 2 3 4 5; do
    [[ -n "$TARGET_SERVER" && "$TARGET_SERVER" != "$n" ]] && continue
    v4=${SERVER_IPV4[$n]} v6=${SERVER_IPV6[$n]} name=${SERVER_NAME[$n]}
    expected=$(get_expected_hostnames "$n" | sort -u)
    actual=$(echo "$ALL_JSON" | jq -r --arg v4 "$v4" --arg v6 "$v6" \
        '.[] | select((.type=="A" and .content==$v4) or (.type=="AAAA" and .content==$v6)) | .name' | sort -u)
    exp_count=$(echo "$expected" | wc -l)
    act_count=$(echo "$actual" | wc -l)
    only_dns=$(comm -23 <(echo "$actual") <(echo "$expected") || true)
    only_rule=$(comm -13 <(echo "$actual") <(echo "$expected") || true)

    if [[ "$MODE" == "quick" ]]; then
        if [[ -z "$only_dns" && -z "$only_rule" ]]; then
            printf "  [%d] %-22s ${GREEN}OK${NC} (expected=%d, actual=%d, bijektiv)\n" "$n" "$name" "$exp_count" "$act_count"
        else
            printf "  [%d] %-22s ${RED}DRIFT${NC} (only-DNS=%d only-rule=%d)\n" "$n" "$name" \
                "$(echo "$only_dns" | grep -c .)" "$(echo "$only_rule" | grep -c .)"
            exit_code=1
        fi
        continue
    fi

    echo -e "${BLUE}=== [$n] $name ($v4 / $v6) ===${NC}"
    echo "  Expected (Regel-Soll): $exp_count hostnames"
    echo "  Actual (DNS-Ist):      $act_count hostnames"

    if [[ -z "$only_dns" && -z "$only_rule" ]]; then
        echo -e "  ${GREEN}[PASS] Regel-Soll == DNS-Ist. Drain+Restore waere lossless.${NC}"
    fi

    if [[ -n "$only_dns" ]]; then
        echo -e "  ${RED}[FAIL] DNS-Records ohne Regel-Coverage (Drain wuerde loeschen, Restore nicht wiederherstellen):${NC}"
        echo "$only_dns" | sed 's/^/    -> /'
        exit_code=1
    fi

    if [[ -n "$only_rule" ]]; then
        echo -e "  ${YELLOW}[INFO] Regel-Soll-Records die im DNS fehlen (Restore wuerde neu anlegen):${NC}"
        echo "$only_rule" | sed 's/^/    -> /'
    fi
    echo ""
done

if [[ $exit_code -eq 0 ]]; then
    echo -e "${GREEN}=== Audit PASSED -- dns-maintenance.sh drain/restore ist cluster-weit bijektiv ===${NC}"
else
    echo -e "${RED}=== Audit FAILED -- Regeln in dns-maintenance.sh und CF-DNS driften auseinander ===${NC}"
    echo "Naechster Schritt: is_record_expected() oder ROOT_RECORDS_SRV1 in dns-maintenance.sh anpassen, ODER die Rogue-DNS-Records manuell beheben."
fi

exit $exit_code
