Scripts to build container, download model, and serve Qwen3.5-35B-A3B via vLLM with OpenAI-compatible API on port 7080. Configured for 2x NVIDIA L40S GPUs with tensor parallelism, supporting ~15 concurrent students. Made-with: Cursor
32 lines
853 B
Bash
Executable File
32 lines
853 B
Bash
Executable File
#!/usr/bin/env bash
|
|
# ------------------------------------------------------------------
|
|
# 05_stop_server.sh
|
|
# Gracefully stops the background vLLM server.
|
|
# ------------------------------------------------------------------
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
PID_FILE="${SCRIPT_DIR}/logs/vllm_server.pid"
|
|
|
|
if [ ! -f "$PID_FILE" ]; then
|
|
echo "No PID file found. Server may not be running."
|
|
exit 0
|
|
fi
|
|
|
|
SERVER_PID=$(cat "$PID_FILE")
|
|
|
|
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
|
echo "Stopping server (PID: ${SERVER_PID})..."
|
|
kill "$SERVER_PID"
|
|
sleep 2
|
|
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
|
echo "Process still alive, sending SIGKILL..."
|
|
kill -9 "$SERVER_PID"
|
|
fi
|
|
echo "Server stopped."
|
|
else
|
|
echo "Server process (PID: ${SERVER_PID}) is not running."
|
|
fi
|
|
|
|
rm -f "$PID_FILE"
|