Scripts to build container, download model, and serve Qwen3.5-35B-A3B via vLLM with OpenAI-compatible API on port 7080. Configured for 2x NVIDIA L40S GPUs with tensor parallelism, supporting ~15 concurrent students. Made-with: Cursor
54 lines
1.5 KiB
Bash
Executable File
54 lines
1.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ------------------------------------------------------------------
|
|
# 04_start_server_background.sh
|
|
# Launches the vLLM server in the background with logging.
|
|
# Useful for long-running deployments or running inside tmux/screen.
|
|
#
|
|
# Usage:
|
|
# bash 04_start_server_background.sh
|
|
#
|
|
# Logs are written to: ./logs/vllm_server_<timestamp>.log
|
|
# PID is written to: ./logs/vllm_server.pid
|
|
# ------------------------------------------------------------------
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
LOG_DIR="${SCRIPT_DIR}/logs"
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
|
|
LOG_FILE="${LOG_DIR}/vllm_server_${TIMESTAMP}.log"
|
|
PID_FILE="${LOG_DIR}/vllm_server.pid"
|
|
|
|
if [ -f "$PID_FILE" ]; then
|
|
OLD_PID=$(cat "$PID_FILE")
|
|
if kill -0 "$OLD_PID" 2>/dev/null; then
|
|
echo "Server already running with PID ${OLD_PID}"
|
|
echo "Stop it first: bash 05_stop_server.sh"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
echo "Starting vLLM server in background..."
|
|
echo "Log file: ${LOG_FILE}"
|
|
|
|
nohup bash "${SCRIPT_DIR}/03_start_server.sh" > "$LOG_FILE" 2>&1 &
|
|
SERVER_PID=$!
|
|
echo "$SERVER_PID" > "$PID_FILE"
|
|
|
|
echo "Server PID: ${SERVER_PID}"
|
|
echo ""
|
|
echo "Monitor logs: tail -f ${LOG_FILE}"
|
|
echo "Stop server: bash 05_stop_server.sh"
|
|
echo ""
|
|
|
|
sleep 3
|
|
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
|
echo "Server process is running. Waiting for model to load..."
|
|
echo "(This can take several minutes for Qwen3.5-35B-A3B)"
|
|
else
|
|
echo "ERROR: Server process exited. Check logs:"
|
|
tail -20 "$LOG_FILE"
|
|
exit 1
|
|
fi
|