LLM_Inferenz_Server_1/02_download_model.sh
herzogflorian 076001b07f Add vLLM inference setup for Qwen3.5-35B-A3B on Apptainer
Scripts to build container, download model, and serve Qwen3.5-35B-A3B
via vLLM with OpenAI-compatible API on port 7080. Configured for 2x
NVIDIA L40S GPUs with tensor parallelism, supporting ~15 concurrent
students.

Made-with: Cursor
2026-03-02 14:43:39 +01:00

51 lines
1.5 KiB
Bash
Executable File

#!/usr/bin/env bash
# ------------------------------------------------------------------
# 02_download_model.sh
# Downloads Qwen3.5-35B-A3B weights from Hugging Face
# using huggingface-cli INSIDE the Apptainer container.
#
# Prerequisites:
# - Container built via 01_build_container.sh
#
# Usage:
# bash 02_download_model.sh [TARGET_DIR]
# ------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SIF_FILE="${SCRIPT_DIR}/vllm_qwen.sif"
MODEL_ID="Qwen/Qwen3.5-35B-A3B"
TARGET_DIR="${1:-$HOME/models/Qwen3.5-35B-A3B}"
HF_CACHE_DIR="${HOME}/.cache/huggingface"
if [ ! -f "$SIF_FILE" ]; then
echo "ERROR: Container image not found at ${SIF_FILE}"
echo " Run 01_build_container.sh first."
exit 1
fi
echo "=== Downloading ${MODEL_ID} to ${TARGET_DIR} ==="
echo " Using huggingface-cli inside the container."
echo ""
mkdir -p "$TARGET_DIR" "$HF_CACHE_DIR"
apptainer exec \
--writable-tmpfs \
--bind "$(dirname "$TARGET_DIR"):$(dirname "$TARGET_DIR")" \
--bind "${HF_CACHE_DIR}:${HF_CACHE_DIR}" \
--env HF_HOME="${HF_CACHE_DIR}" \
--env HF_HUB_CACHE="${HF_CACHE_DIR}/hub" \
--env XDG_CACHE_HOME="${HOME}/.cache" \
"$SIF_FILE" \
huggingface-cli download "$MODEL_ID" \
--local-dir "$TARGET_DIR" \
--local-dir-use-symlinks False
echo ""
echo "=== Download complete ==="
echo "Model stored at: ${TARGET_DIR}"
echo "Total size:"
du -sh "$TARGET_DIR"