tools: add ppf-service
Status, start, stop, restart for PPF containers. Status includes compose ps, master health check, and worker API query.
This commit is contained in:
183
tools/ppf-service
Executable file
183
tools/ppf-service
Executable file
@@ -0,0 +1,183 @@
|
||||
#!/bin/bash
|
||||
# ppf-service -- manage PPF containers
|
||||
#
|
||||
# Usage:
|
||||
# ppf-service <command> [nodes...]
|
||||
#
|
||||
# Commands: status, start, stop, restart
|
||||
|
||||
set -eu
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
# shellcheck disable=SC1091
|
||||
. "$SCRIPT_DIR/lib/ppf-common.sh"
|
||||
|
||||
ODIN_URL="http://10.200.1.250:8081"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Usage
|
||||
# ---------------------------------------------------------------------------
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: ppf-service <command> [nodes...]
|
||||
|
||||
Manage PPF containers on remote nodes.
|
||||
|
||||
Commands:
|
||||
status show container state + health (default nodes: all)
|
||||
start start containers (compose up -d)
|
||||
stop stop containers (compose stop)
|
||||
restart restart containers (compose restart)
|
||||
|
||||
Nodes:
|
||||
all odin + all workers (default)
|
||||
workers cassius, edge, sentinel
|
||||
master odin
|
||||
<hostname> specific host(s)
|
||||
|
||||
Options:
|
||||
--help show this help
|
||||
--version show version
|
||||
|
||||
Examples:
|
||||
ppf-service status
|
||||
ppf-service restart workers
|
||||
ppf-service stop cassius edge
|
||||
ppf-service start odin
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Status helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
show_health() {
|
||||
local result
|
||||
result=$(ansible_cmd "$MASTER" -m raw -a \
|
||||
"curl -sf --max-time 5 ${ODIN_URL}/health 2>/dev/null || echo UNREACHABLE" \
|
||||
2>/dev/null) || true
|
||||
if echo "$result" | grep -qi "ok\|healthy"; then
|
||||
log_ok "master health: ok"
|
||||
elif echo "$result" | grep -qi "UNREACHABLE"; then
|
||||
log_err "master health: unreachable"
|
||||
else
|
||||
log_warn "master health: $result"
|
||||
fi
|
||||
}
|
||||
|
||||
show_workers_api() {
|
||||
local result
|
||||
result=$(ansible_cmd "$MASTER" -m raw -a \
|
||||
"curl -sf --max-time 5 ${ODIN_URL}/api/workers 2>/dev/null || echo '{}'" \
|
||||
2>/dev/null) || true
|
||||
# Just show the raw output, trimmed
|
||||
local data
|
||||
data=$(echo "$result" | grep -v '^\s*$' | grep -v '^[A-Z]' | head -20)
|
||||
if [ -n "$data" ]; then
|
||||
log_info "Worker API response:"
|
||||
echo "$data" | while IFS= read -r line; do
|
||||
log_dim "$line"
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Commands
|
||||
# ---------------------------------------------------------------------------
|
||||
cmd_status() {
|
||||
local hosts="$1"
|
||||
section "Container status"
|
||||
|
||||
for host in $hosts; do
|
||||
local output
|
||||
output=$(compose_cmd "$host" "ps" 2>/dev/null) || true
|
||||
if echo "$output" | grep -qi "up\|running"; then
|
||||
log_ok "$host"
|
||||
elif echo "$output" | grep -qi "exit"; then
|
||||
log_err "$host (exited)"
|
||||
else
|
||||
log_warn "$host (unknown)"
|
||||
fi
|
||||
echo "$output" | grep -v '^\s*$' | while IFS= read -r line; do
|
||||
log_dim "$line"
|
||||
done
|
||||
done
|
||||
|
||||
# Show health/worker info if master is in target list
|
||||
local h
|
||||
for h in $hosts; do
|
||||
if is_master "$h"; then
|
||||
section "Master health"
|
||||
show_health
|
||||
show_workers_api
|
||||
break
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
cmd_start() {
|
||||
local hosts="$1"
|
||||
section "Starting containers"
|
||||
for host in $hosts; do
|
||||
compose_cmd "$host" "up -d" > /dev/null 2>&1 \
|
||||
&& log_ok "$host started" \
|
||||
|| log_err "$host start failed"
|
||||
done
|
||||
}
|
||||
|
||||
cmd_stop() {
|
||||
local hosts="$1"
|
||||
section "Stopping containers"
|
||||
for host in $hosts; do
|
||||
compose_cmd "$host" "stop" > /dev/null 2>&1 \
|
||||
&& log_ok "$host stopped" \
|
||||
|| log_err "$host stop failed"
|
||||
done
|
||||
}
|
||||
|
||||
cmd_restart() {
|
||||
local hosts="$1"
|
||||
section "Restarting containers"
|
||||
for host in $hosts; do
|
||||
compose_cmd "$host" "restart" > /dev/null 2>&1 \
|
||||
&& log_ok "$host restarted" \
|
||||
|| log_err "$host restart failed"
|
||||
done
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parse args
|
||||
# ---------------------------------------------------------------------------
|
||||
[ $# -eq 0 ] && usage
|
||||
|
||||
COMMAND=""
|
||||
TARGETS=""
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--help|-h) usage ;;
|
||||
--version|-V) echo "ppf-service $PPF_TOOLS_VERSION"; exit 0 ;;
|
||||
status|start|stop|restart)
|
||||
[ -n "$COMMAND" ] && die "Multiple commands given"
|
||||
COMMAND="$1"
|
||||
;;
|
||||
-*) die "Unknown option: $1" ;;
|
||||
*) TARGETS="${TARGETS:+$TARGETS }$1" ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
[ -z "$COMMAND" ] && die "No command given. Use: status, start, stop, restart"
|
||||
|
||||
TARGETS="${TARGETS:-all}"
|
||||
HOSTS=$(resolve_targets $TARGETS)
|
||||
[ -z "$HOSTS" ] && die "No valid targets"
|
||||
|
||||
case "$COMMAND" in
|
||||
status) cmd_status "$HOSTS" ;;
|
||||
start) cmd_start "$HOSTS" ;;
|
||||
stop) cmd_stop "$HOSTS" ;;
|
||||
restart) cmd_restart "$HOSTS" ;;
|
||||
esac
|
||||
|
||||
printf "\n"
|
||||
Reference in New Issue
Block a user