LLM_Inferenz_Server_1/04_start_server_background.sh
herzogflorian 076001b07f Add vLLM inference setup for Qwen3.5-35B-A3B on Apptainer
Scripts to build container, download model, and serve Qwen3.5-35B-A3B
via vLLM with OpenAI-compatible API on port 7080. Configured for 2x
NVIDIA L40S GPUs with tensor parallelism, supporting ~15 concurrent
students.

Made-with: Cursor
2026-03-02 14:43:39 +01:00

54 lines
1.5 KiB
Bash
Executable File

#!/usr/bin/env bash
# ------------------------------------------------------------------
# 04_start_server_background.sh
# Launches the vLLM server in the background with logging.
# Useful for long-running deployments or running inside tmux/screen.
#
# Usage:
# bash 04_start_server_background.sh
#
# Logs are written to: ./logs/vllm_server_<timestamp>.log
# PID is written to: ./logs/vllm_server.pid
# ------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
LOG_DIR="${SCRIPT_DIR}/logs"
mkdir -p "$LOG_DIR"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
LOG_FILE="${LOG_DIR}/vllm_server_${TIMESTAMP}.log"
PID_FILE="${LOG_DIR}/vllm_server.pid"
if [ -f "$PID_FILE" ]; then
OLD_PID=$(cat "$PID_FILE")
if kill -0 "$OLD_PID" 2>/dev/null; then
echo "Server already running with PID ${OLD_PID}"
echo "Stop it first: bash 05_stop_server.sh"
exit 1
fi
fi
echo "Starting vLLM server in background..."
echo "Log file: ${LOG_FILE}"
nohup bash "${SCRIPT_DIR}/03_start_server.sh" > "$LOG_FILE" 2>&1 &
SERVER_PID=$!
echo "$SERVER_PID" > "$PID_FILE"
echo "Server PID: ${SERVER_PID}"
echo ""
echo "Monitor logs: tail -f ${LOG_FILE}"
echo "Stop server: bash 05_stop_server.sh"
echo ""
sleep 3
if kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Server process is running. Waiting for model to load..."
echo "(This can take several minutes for Qwen3.5-35B-A3B)"
else
echo "ERROR: Server process exited. Check logs:"
tail -20 "$LOG_FILE"
exit 1
fi