- Add download script (10), start script (11), and background launcher (12) for the 122B FP8 model using all 4 GPUs with TP=4 - Both models share port 7080; only one runs at a time - Update README with dual-model hardware table, switching workflow, and updated file overview - Update STUDENT_GUIDE with both model names and discovery instructions Made-with: Cursor
56 lines
1.6 KiB
Bash
Executable File
56 lines
1.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ------------------------------------------------------------------
|
|
# 12_start_server_122b_background.sh
|
|
# Launches the vLLM server for Qwen3.5-122B-A10B-FP8 in background.
|
|
#
|
|
# NOTE: Only one model can run on port 7080 at a time.
|
|
# Stop the current model first: bash 05_stop_server.sh
|
|
#
|
|
# Usage:
|
|
# bash 12_start_server_122b_background.sh
|
|
#
|
|
# Logs are written to: ./logs/vllm_server_122b_<timestamp>.log
|
|
# PID is written to: ./logs/vllm_server.pid
|
|
# ------------------------------------------------------------------
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
LOG_DIR="${SCRIPT_DIR}/logs"
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
|
|
LOG_FILE="${LOG_DIR}/vllm_server_122b_${TIMESTAMP}.log"
|
|
PID_FILE="${LOG_DIR}/vllm_server.pid"
|
|
|
|
if [ -f "$PID_FILE" ]; then
|
|
OLD_PID=$(cat "$PID_FILE")
|
|
if kill -0 "$OLD_PID" 2>/dev/null; then
|
|
echo "A vLLM server is already running with PID ${OLD_PID}"
|
|
echo "Stop it first: bash 05_stop_server.sh"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
echo "Starting vLLM server (122B) in background..."
|
|
echo "Log file: ${LOG_FILE}"
|
|
|
|
nohup bash "${SCRIPT_DIR}/11_start_server_122b.sh" > "$LOG_FILE" 2>&1 &
|
|
SERVER_PID=$!
|
|
echo "$SERVER_PID" > "$PID_FILE"
|
|
|
|
echo "Server PID: ${SERVER_PID}"
|
|
echo ""
|
|
echo "Monitor logs: tail -f ${LOG_FILE}"
|
|
echo "Stop server: bash 05_stop_server.sh"
|
|
echo ""
|
|
|
|
sleep 3
|
|
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
|
echo "Server process is running. Waiting for model to load..."
|
|
echo "(This can take 5-10 minutes for Qwen3.5-122B-A10B-FP8)"
|
|
else
|
|
echo "ERROR: Server process exited. Check logs:"
|
|
tail -20 "$LOG_FILE"
|
|
exit 1
|
|
fi
|