LLM_Inferenz_Server_1/test_server.py
herzogflorian 9e1e0c0751 Add Streamlit chat app, update container to vLLM nightly
- Add app.py: Streamlit UI with chat and file editor tabs
- Add requirements.txt: streamlit + openai dependencies
- Update vllm_qwen.def: use nightly image for Qwen3.5 support
- Update README.md: reflect 35B-A3B model, correct script names
- Update STUDENT_GUIDE.md: add app usage and thinking mode docs
- Update .gitignore: exclude .venv/ and workspace/

Made-with: Cursor
2026-03-02 16:30:04 +01:00

71 lines
2.2 KiB
Python

"""
Quick test script to verify the vLLM server is running and responding.
Usage:
pip install openai
python test_server.py [--host HOST] [--port PORT] [--api-key KEY]
"""
import argparse
import sys
from openai import OpenAI
def main():
parser = argparse.ArgumentParser(description="Test vLLM inference server")
parser.add_argument("--host", default="localhost", help="Server hostname")
parser.add_argument("--port", default=7080, type=int, help="Server port")
parser.add_argument("--api-key", default="EMPTY", help="API key")
args = parser.parse_args()
base_url = f"http://{args.host}:{args.port}/v1"
model = "qwen3.5-35b-a3b"
client = OpenAI(base_url=base_url, api_key=args.api_key)
print(f"Connecting to {base_url} ...")
print("\n--- Available Models ---")
try:
models = client.models.list()
for m in models.data:
print(f" {m.id}")
except Exception as e:
print(f"ERROR: Cannot connect to server: {e}")
sys.exit(1)
print("\n--- Test Chat Completion ---")
response = client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": "Create a latex document that derives and explains the principle component analysis (pca). Make a self contain document with introduction, derivation, examples of applications. This is for computer science undergraduate class."}
],
max_tokens=16384,
temperature=0.7,
)
print(f" Response: {response.choices[0].message.content}")
print(f" Tokens: prompt={response.usage.prompt_tokens}, "
f"completion={response.usage.completion_tokens}")
print("\n--- Test Streaming ---")
stream = client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": "Count from 1 to 5."}
],
max_tokens=16384,
temperature=0.7,
stream=True,
)
print(" Response: ", end="")
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print("\n")
print("All tests passed!")
if __name__ == "__main__":
main()