Scripts to build container, download model, and serve Qwen3.5-35B-A3B via vLLM with OpenAI-compatible API on port 7080. Configured for 2x NVIDIA L40S GPUs with tensor parallelism, supporting ~15 concurrent students. Made-with: Cursor
51 lines
1.5 KiB
Bash
Executable File
51 lines
1.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ------------------------------------------------------------------
|
|
# 02_download_model.sh
|
|
# Downloads Qwen3.5-35B-A3B weights from Hugging Face
|
|
# using huggingface-cli INSIDE the Apptainer container.
|
|
#
|
|
# Prerequisites:
|
|
# - Container built via 01_build_container.sh
|
|
#
|
|
# Usage:
|
|
# bash 02_download_model.sh [TARGET_DIR]
|
|
# ------------------------------------------------------------------
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
SIF_FILE="${SCRIPT_DIR}/vllm_qwen.sif"
|
|
|
|
MODEL_ID="Qwen/Qwen3.5-35B-A3B"
|
|
TARGET_DIR="${1:-$HOME/models/Qwen3.5-35B-A3B}"
|
|
HF_CACHE_DIR="${HOME}/.cache/huggingface"
|
|
|
|
if [ ! -f "$SIF_FILE" ]; then
|
|
echo "ERROR: Container image not found at ${SIF_FILE}"
|
|
echo " Run 01_build_container.sh first."
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== Downloading ${MODEL_ID} to ${TARGET_DIR} ==="
|
|
echo " Using huggingface-cli inside the container."
|
|
echo ""
|
|
|
|
mkdir -p "$TARGET_DIR" "$HF_CACHE_DIR"
|
|
|
|
apptainer exec \
|
|
--writable-tmpfs \
|
|
--bind "$(dirname "$TARGET_DIR"):$(dirname "$TARGET_DIR")" \
|
|
--bind "${HF_CACHE_DIR}:${HF_CACHE_DIR}" \
|
|
--env HF_HOME="${HF_CACHE_DIR}" \
|
|
--env HF_HUB_CACHE="${HF_CACHE_DIR}/hub" \
|
|
--env XDG_CACHE_HOME="${HOME}/.cache" \
|
|
"$SIF_FILE" \
|
|
huggingface-cli download "$MODEL_ID" \
|
|
--local-dir "$TARGET_DIR" \
|
|
--local-dir-use-symlinks False
|
|
|
|
echo ""
|
|
echo "=== Download complete ==="
|
|
echo "Model stored at: ${TARGET_DIR}"
|
|
echo "Total size:"
|
|
du -sh "$TARGET_DIR"
|