Scripts to build container, download model, and serve Qwen3.5-35B-A3B via vLLM with OpenAI-compatible API on port 7080. Configured for 2x NVIDIA L40S GPUs with tensor parallelism, supporting ~15 concurrent students. Made-with: Cursor
33 lines
978 B
Bash
Executable File
33 lines
978 B
Bash
Executable File
#!/usr/bin/env bash
|
|
# ------------------------------------------------------------------
|
|
# 01_build_container.sh
|
|
# Builds the Apptainer SIF image for vLLM inference.
|
|
# This must be run FIRST — everything else runs inside the container.
|
|
#
|
|
# Usage:
|
|
# bash 01_build_container.sh
|
|
# ------------------------------------------------------------------
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
DEF_FILE="${SCRIPT_DIR}/vllm_qwen.def"
|
|
SIF_FILE="${SCRIPT_DIR}/vllm_qwen.sif"
|
|
|
|
if [ -f "$SIF_FILE" ]; then
|
|
echo "WARNING: ${SIF_FILE} already exists."
|
|
read -p "Rebuild? [y/N] " confirm
|
|
[[ "$confirm" =~ ^[Yy]$ ]] || exit 0
|
|
fi
|
|
|
|
echo "=== Building Apptainer image from ${DEF_FILE} ==="
|
|
echo " This will pull the vLLM Docker image and convert it."
|
|
echo " Estimated time: 10-20 minutes depending on network speed."
|
|
echo ""
|
|
|
|
apptainer build --nv "$SIF_FILE" "$DEF_FILE"
|
|
|
|
echo ""
|
|
echo "=== Build complete ==="
|
|
echo "Image: ${SIF_FILE}"
|
|
ls -lh "$SIF_FILE"
|