#!/usr/bin/env bash # ------------------------------------------------------------------ # 17_stop_replicas_122b.sh # Stops the load balancer and both 122B replicas started by # 15_start_replicas_122b.sh. # ------------------------------------------------------------------ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" LOG_DIR="${SCRIPT_DIR}/logs" stop_one () { local name="$1" pidfile="$2" if [ ! -f "$pidfile" ]; then echo "${name}: no PID file, skipping." return fi local pid pid=$(cat "$pidfile") if kill -0 "$pid" 2>/dev/null; then echo "Stopping ${name} (PID ${pid})..." kill "$pid" 2>/dev/null || true for _ in 1 2 3 4 5; do kill -0 "$pid" 2>/dev/null || break sleep 1 done if kill -0 "$pid" 2>/dev/null; then echo " still alive, SIGKILL..." kill -9 "$pid" 2>/dev/null || true fi echo " ${name} stopped." else echo "${name} (PID ${pid}) not running." fi rm -f "$pidfile" } # Stop LB first so no new requests get routed to dying replicas. stop_one "load balancer" "${LOG_DIR}/vllm_lb.pid" stop_one "replica a" "${LOG_DIR}/vllm_replica_a.pid" stop_one "replica b" "${LOG_DIR}/vllm_replica_b.pid" echo "" echo "All replicas and load balancer stopped." echo "Note: vLLM worker subprocesses may take a few seconds to release GPU memory." echo "Verify with: nvidia-smi"