#!/bin/bash
# Configuration
PATRONI_API="http://localhost:8008"
MAX_ALLOWED_LAG=1048576 # 1MB lag threshold
# Function to get LSN of a node
get_lsn() {
local NODE=$1
curl -s "${PATRONI_API}/cluster" | jq -r ".members[] | select(.name==\"$NODE\") | .xlog_location"
}
# Check if Patroni API is accessible
if ! curl -s "${PATRONI_API}/cluster" &>/dev/null; then
echo "Error: Unable to reach Patroni API at ${PATRONI_API}"
exit 1
fi
# Fetch current cluster status
echo "Fetching Patroni cluster status..."
CLUSTER_INFO=$(curl -s "${PATRONI_API}/cluster")
# Extract leader and replicas
LEADER=$(echo "$CLUSTER_INFO" | jq -r '.leader')
REPLICAS=$(echo "$CLUSTER_INFO" | jq -r '.members[] | select(.role=="replica") | .name')
echo "Current Leader: $LEADER"
echo "Available Replicas:"
echo "$REPLICAS"
# Get current LSN for leader
LEADER_LSN=$(get_lsn "$LEADER")
echo "Leader LSN: $LEADER_LSN"
# Prompt user for the target leader
read -p "Enter the target replica to become the new leader: " TARGET
# Validate input
if ! echo "$REPLICAS" | grep -qw "$TARGET"; then
echo "Error: Invalid replica name. Exiting."
exit 1
fi
# Get LSN for target replica
TARGET_LSN=$(get_lsn "$TARGET")
echo "Target Replica LSN: $TARGET_LSN"
# Calculate LSN lag
LSN_LAG=$((LEADER_LSN - TARGET_LSN))
echo "LSN Lag: $LSN_LAG bytes"
# Validate RPO threshold
if (( LSN_LAG > MAX_ALLOWED_LAG )); then
echo "Error: Replica is lagging by more than $MAX_ALLOWED_LAG bytes. Switchover aborted."
exit 1
fi
# Initiate switchover
echo "Initiating switchover from $LEADER to $TARGET..."
SWITCHOVER_RESPONSE=$(curl -s -X POST "${PATRONI_API}/switchover" -H "Content-Type: application/json" \
-d "{\"leader\": \"$LEADER\", \"candidate\": \"$TARGET\"}")
# Check result
if echo "$SWITCHOVER_RESPONSE" | jq -e '.error' &>/dev/null; then
echo "Switchover failed: $(echo "$SWITCHOVER_RESPONSE" | jq -r '.error')"
exit 1
fi
echo "Switchover successful! $TARGET is now the new leader."





0 comments:
Post a Comment