Add API to repo

2025-05-24 12:17:45 +02:00 · 2025-05-24 12:17:45 +02:00 · 52d087ce90
commit 52d087ce90
parent f80792d739
20 changed files with 2592 additions and 0 deletions
--- a/librarian/atlas-librarian/.prefectignore
+++ b/librarian/atlas-librarian/.prefectignore
@ -0,0 +1,41 @@
+# prefect artifacts
+.prefectignore
+
+# python artifacts
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+*.egg
+
+# Type checking artifacts
+.mypy_cache/
+.dmypy.json
+dmypy.json
+.pyre/
+
+# IPython
+profile_default/
+ipython_config.py
+*.ipynb_checkpoints/*
+
+# Environments
+.python-version
+.env
+.venv
+env/
+venv/
+
+# MacOS
+.DS_Store
+
+# Dask
+dask-worker-space/
+
+# Editors
+.idea/
+.vscode/
+
+# VCS
+.git/
+.hg/
--- a/librarian/atlas-librarian/README.md
+++ b/librarian/atlas-librarian/README.md
@ -0,0 +1,14 @@
+# Usage
+
+## Serving the api
+run the following command to start the server
+
+```bash
+uv run uvicorn atlas_librarian.app:app --reload --port 8000
+```
+
+## Deploy to prefect
+run the following command to deploy the flow to prefect
+```bash
+uv run deploy
+```
--- a/librarian/atlas-librarian/docs/api_specification.md
+++ b/librarian/atlas-librarian/docs/api_specification.md
@ -0,0 +1,117 @@
+# Atlas Librarian API Specification (v0.1.0)
+
+This document outlines the endpoints provided by the Atlas Librarian API Gateway.
+
+**Base URL:** `/v1`
+
+## Authentication
+
+Currently, no authentication is required.
+
+## Tasks
+
+Long-running operations like crawling and downloading are handled via background tasks. You initiate a task via a POST request and then poll the status endpoint using the returned `task_id`.
+
+### Task Object (`TaskInfo`)
+
+```json
+{
+    "task_id": "string (uuid)",
+    "name": "string | null (e.g., 'crawl', 'download')",
+    "state": "string (queued | running | success | error)",
+    "progress": "number | null (0.0 to 1.0, only present while running)",
+    "detail": "string | null (details about state, especially errors)",
+    "download_links": "array[string] | null (only for successful download tasks)"
+}
+```
+
+---
+
+## Endpoints
+
+### Crawler
+
+**Tags:** `Crawler`
+
+- **`POST /v1/crawl`**
+  - **Summary:** Start Course Crawl
+  - **Description:** Initiates a background task to crawl Moodle course information. `no_cache=true` forces a fresh crawl.
+  - **Request Body:** `CrawlRequest`
+
+        ```json
+        {
+            "no_cache": "boolean (optional, default: false)"
+        }
+        ```
+
+  - **Response (202 Accepted):** `TaskInfo` (initial state: queued)
+- **`GET /v1/crawl/status/{task_id}`**
+  - **Summary:** Get Crawl Task Status
+  - **Description:** Retrieves the current status (queued, running, success, error) and progress of a specific crawl task.
+  - **Path Parameter:** `task_id` (string)
+  - **Response (200 OK):** `TaskInfo`
+  - **Response (404 Not Found):** If `task_id` does not exist.
+
+### Downloader
+
+**Tags:** `Downloader`
+
+- **`POST /v1/download`**
+  - **Summary:** Start Course Material Download
+  - **Description:** Initiates a background task to download materials for the specified list of courses.
+  - **Request Body:** `DownloadRequest`
+
+        ```json
+        {
+            "courses": [
+                {
+                    "module_id": "string",
+                    "term_id": "string"
+                }
+            ]
+        }
+        ```
+
+  - **Response (202 Accepted):** `TaskInfo` (initial state: queued)
+  - **Response (400 Bad Request):** If `courses` list is empty.
+- **`GET /v1/download/status/{task_id}`**
+  - **Summary:** Get Download Task Status
+  - **Description:** Retrieves the current status (queued, running, success, error) and progress of a specific download task. Includes `download_links` on success.
+  - **Path Parameter:** `task_id` (string)
+  - **Response (200 OK):** `TaskInfo`
+  - **Response (404 Not Found):** If `task_id` does not exist.
+
+### Metadata
+
+**Tags:** `Metadata`
+
+- **`GET /v1/terms`**
+  - **Summary:** Get All Terms
+  - **Description:** Retrieves a list of all available academic terms (e.g., 'HS24', 'FS25').
+  - **Response (200 OK):** `List[string]`
+
+        ```json
+        ["HS24", "FS25"]
+        ```
+
+### Summaries
+
+**Tags:** `Summaries`
+
+- **`GET /v1/summaries/{term_id}/{course_id}`**
+  - **Summary:** Get Course Summary
+  - **Description:** Retrieves the generated summary content (in Markdown format) for a specific course within a specific term.
+  - **Path Parameters:** `term_id` (string), `course_id` (string)
+  - **Response (200 OK):** `SummaryResponse`
+
+        ```json
+        {
+            "summary": "string (Markdown)"
+        }
+        ```
+
+  - **Response (404 Not Found):** If the summary for the given course/term does not exist.
+
+---
+
+_Generated based on FastAPI application structure._
--- a/librarian/atlas-librarian/pyproject.toml
+++ b/librarian/atlas-librarian/pyproject.toml
@ -0,0 +1,44 @@
+[project]
+name = "atlas-librarian"
+version = "0.1.0"
+description = "FastAPI gateway and runtime pipeline for Librarian"
+readme = "README.md"
+authors = [
+    { name = "DotNaos", email = "schuetzoliver00@gmail.com" }
+]
+requires-python = ">=3.10"
+dependencies = [
+  "fastapi",
+  "dotenv",
+  "uvicorn[standard]",
+  "importlib_metadata; python_version<'3.10'",
+  "librarian-core",
+  "librarian-extractor",
+  "librarian-scraper",
+  "prefect>=3.4.1",
+]
+
+# [tool.uv.sources]
+# librarian-core = { git = "https://github.com/DotNaos/librarian-core", rev = "main" }
+# librarian-extractor = { git = "https://github.com/DotNaos/librarian-extractor", rev = "main" }
+# librarian-scraper = { git = "https://github.com/DotNaos/librarian-scraper", rev = "main" }
+
+
+[build-system]
+requires = ["hatchling>=1.21"]
+build-backend = "hatchling.build"
+
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/atlas_librarian", "src/atlas_librarian/app"]
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[project.scripts]
+deploy = "atlas_librarian.app.cli:deploy"
+
+
+# ───────── optional: dev / test extras ─────────
+[project.optional-dependencies]
+dev   = ["typer", "ruff", "pytest", "mypy"]
--- a/librarian/atlas-librarian/src/atlas_librarian/init.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/init.py
@ -0,0 +1,20 @@
+import pkgutil
+import importlib
+
+__all__ = []
+
+# Iterate over all modules in this package
+for finder, module_name, is_pkg in pkgutil.iter_modules(__path__):
+    # import the sub-module
+    module = importlib.import_module(f"{__name__}.{module_name}")
+
+    # decide which names to re-export:
+    #  use module.__all__ if it exists, otherwise every non-private attribute
+    public_names = getattr(
+        module, "__all__", [n for n in dir(module) if not n.startswith("_")]
+    )
+
+    # bring each name into the package namespace
+    for name in public_names:
+        globals()[name] = getattr(module, name)
+        __all__.append(name) # type: ignore
--- a/librarian/atlas-librarian/src/atlas_librarian/api/init.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/api/init.py
@ -0,0 +1,23 @@
+# ------------------------------------------------------ #
+# Workers have to be imported here to be discovered by the worker loader
+# ------------------------------------------------------ #
+from librarian_chunker.chunker import Chunker
+from librarian_extractor.ai_sanitizer import AISanitizer
+from librarian_extractor.extractor import Extractor
+from librarian_scraper.crawler import Crawler
+from librarian_scraper.downloader import Downloader
+
+from atlas_librarian.api.recipes import router as recipes_router
+from atlas_librarian.api.runs import router as runs_router
+from atlas_librarian.api.worker import router as worker_router
+
+__all__ = [
+    "worker_router",
+    "runs_router",
+    "recipes_router",
+    "Crawler",
+    "Downloader",
+    "Extractor",
+    "AISanitizer",
+    "Chunker",
+]
--- a/librarian/atlas-librarian/src/atlas_librarian/api/py.typed
+++ b/librarian/atlas-librarian/src/atlas_librarian/api/py.typed
--- a/librarian/atlas-librarian/src/atlas_librarian/api/recipes.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/api/recipes.py
@ -0,0 +1,123 @@
+"""
+Recipe API
+==========
+
+POST  /recipes/run        → start a pipeline (sequence of workers)
+GET   /recipes/{id}       → metadata (simple JSON)
+WS    /recipes/{id}/events → live step-wise updates
+
+The recipe engine constructs a *single* Prefect flow in-memory that
+executes workers one after another, automatically forwarding each
+FlowArtifact to the next worker.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import datetime as dt
+import json
+from pathlib import Path
+import uuid
+from typing import Any, List
+
+import anyio
+from atlas_librarian.api.runs import RunInfo, _open_store
+from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
+from librarian_core.workers.base import FlowArtifact, Worker
+from prefect import Flow, flow, get_client, get_run_logger
+from pydantic import BaseModel, Field, TypeAdapter
+
+from atlas_librarian.stores.workers import WORKERS
+
+router = APIRouter(tags=["recipes"])
+
+
+# --------------------------------------------------------------------------- #
+# Pydantic models                                                             #
+# --------------------------------------------------------------------------- #
+class RecipeRequest(BaseModel):
+    workers: List[str] = Field(min_length=1)
+    payload: dict  # input of the first worker
+
+
+class RecipeMetadata(BaseModel):
+    id: str
+    created: dt.datetime
+    worker_chain: List[str]
+    flow_run_id: str | None = None
+
+
+# in-memory “DB”
+_RECIPES: dict[str, RecipeMetadata] = {}
+
+
+
+# --------------------------------------------------------------------------- #
+# routes                                                                      #
+# --------------------------------------------------------------------------- #
+@router.post("/run", status_code=202, response_model=list[FlowArtifact])
+def run_recipe(req: RecipeRequest) -> list[FlowArtifact]:
+    # validation of worker chain
+    for w in req.workers:
+        if w not in WORKERS:
+            raise HTTPException(400, f"Unknown worker: {w}")
+
+    artifacts: list[FlowArtifact] = []
+
+    start_worker: type[Worker] = WORKERS[req.workers[0]]
+
+    payload = start_worker.input_model.model_validate(req.payload)
+
+    async def _run_worker(worker: type[Worker], art: FlowArtifact) -> FlowArtifact:
+        return await worker.flow()(art)
+
+    # Kick off the first worker
+    art: FlowArtifact = anyio.run(_run_worker, start_worker, FlowArtifact(data=payload, run_id=str(uuid.uuid4()), dir=Path(".")))
+    artifacts.append(art)
+
+    for wn in req.workers[1:]:
+        worker: type[Worker] = WORKERS[wn]
+        art = anyio.run(_run_worker, worker, art)
+        artifacts.append(art)
+
+    if not artifacts or len(artifacts) != len(req.workers):
+        raise HTTPException(500, "Failed to run recipe")
+
+    return artifacts
+
+
+
+
+
+
+
+
+
+
+
+@router.get("/{recipe_id}", response_model=RecipeMetadata)
+def get_recipe(recipe_id: str):
+    meta = _RECIPES.get(recipe_id)
+    if not meta:
+        raise HTTPException(404, "Recipe not found")
+    return meta
+
+
+@router.websocket("/{recipe_id}/events")
+async def ws_recipe_events(ws: WebSocket, recipe_id: str):
+    """
+    Streams *flow-level* events for the recipe: each worker completion.
+    """
+    meta = _RECIPES.get(recipe_id)
+    if not meta or not meta.flow_run_id:
+        raise HTTPException(404, "Recipe not running or unknown")
+
+    await ws.accept()
+    try:
+        async with get_client() as client:
+            async for state in client.stream_flow_run_states(meta.flow_run_id): # type: ignore
+                await ws.send_text(state.json(include={"type", "name"}))
+    except WebSocketDisconnect:
+        pass
+    finally:
+        await ws.close()
--- a/librarian/atlas-librarian/src/atlas_librarian/api/runs.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/api/runs.py
@ -0,0 +1,85 @@
+"""
+Light-weight *runs* endpoint to query artefacts produced by workers.
+
+    • `GET /runs/{run_id}`     → metadata & locations for a finished / running flow
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from fastapi import APIRouter, HTTPException
+from librarian_core.storage.worker_store import WorkerStore
+from pydantic import BaseModel
+
+router = APIRouter(tags=["runs"])
+
+
+# --------------------------------------------------------------------------- #
+# response model                                                              #
+# --------------------------------------------------------------------------- #
+class RunInfo(BaseModel):
+    run_id: str
+    worker: str
+    state: str
+    dir: Path
+    data: dict | None = None
+
+
+# --------------------------------------------------------------------------- #
+# helper                                                                      #
+# --------------------------------------------------------------------------- #
+def _open_store(run_id: str) -> WorkerStore:
+    try:
+        return WorkerStore.open(run_id)
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=404, detail="Run-id not found") from exc
+
+
+# --------------------------------------------------------------------------- #
+# routes                                                                      #
+# --------------------------------------------------------------------------- #
+@router.get("/{run_id}", response_model=RunInfo)
+def get_run(run_id: str) -> RunInfo:
+    """
+    Return coarse-grained information about a single flow run.
+
+    For the web-UI we expose only minimal metadata plus the local directory
+    where files were written; clients can read further details from disk.
+    """
+    store = _open_store(run_id)
+    meta = store.metadata  # {'worker_name': …, 'state': …, …}
+
+    return RunInfo(
+        run_id=run_id,
+        worker=meta["worker_name"],
+        state=meta["state"],
+        dir=store.data_dir,
+        data=store.load_model(as_dict=True), # type: ignore
+    )
+
+@router.get("/{worker_name}/latest", response_model=RunInfo | None)
+def get_latest_run(worker_name: str) -> RunInfo | None:
+    artifact: dict[str, Any] | None = WorkerStore.load_latest(worker_name=worker_name)
+    if artifact is None:
+        raise HTTPException(status_code=404, detail="No runs found")
+
+    store = _open_store(artifact["run_id"])
+    meta = store.metadata
+    return RunInfo(
+        run_id=artifact["run_id"],
+        worker=worker_name,
+        state=meta["state"],
+        dir=artifact["dir"],
+        data=artifact["data"],
+    )
+
+
+@router.get("/{run_id}/artifact")
+def get_artifact(run_id: str) -> str:
+    store = _open_store(run_id)
+    # Check if the artifact.md file exists
+    if not store._run_dir.joinpath("artifact.md").exists():
+        raise HTTPException(status_code=404, detail="Artifact not found")
+    return store._run_dir.joinpath("artifact.md").read_text()
--- a/librarian/atlas-librarian/src/atlas_librarian/api/worker.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/api/worker.py
@ -0,0 +1,204 @@
+"""
+REST endpoints exposing individual workers + *live-status* stream.
+
+    GET  /workers                 → ["Crawler", "Downloader", …]
+    POST /workers/{name}          → start worker; body = {"dir": <str>?, "data": …}
+    POST /workers/{name}/chain    → body = previous FlowArtifact
+    WS   /workers/{run_id}/events → live Prefect state changes for that run
+"""
+
+from __future__ import annotations
+
+import uuid
+from typing import Any
+
+from fastapi import APIRouter, HTTPException
+from librarian_core.storage.worker_store import WorkerStore
+from librarian_core.workers.base import FlowArtifact, Worker
+from pydantic import BaseModel
+
+from atlas_librarian.stores.workers import WORKERS
+
+router = APIRouter(tags=["workers"])
+
+
+# --------------------------------------------------------------------------- #
+# response schema                                                             #
+# --------------------------------------------------------------------------- #
+
+class Order(BaseModel):
+    order_id: str
+    worker_name: str
+    payload: dict
+
+    # Job is accepted and will be moved in the Job Pool
+    def accept(self):
+        _ORDER_POOL[self.order_id] = self
+        return self.order_id
+
+    # Job is completed and will be removed from the Job Pool
+    def complete(self):
+        del _ORDER_POOL[self.order_id]
+
+
+_ORDER_POOL: dict[str, Order] = {}
+
+
+# --------------------------------------------------------------------------- #
+# helpers                                                                     #
+# --------------------------------------------------------------------------- #
+def _get_worker_or_404(name: str):
+    cls = WORKERS.get(name)
+    if cls is None:
+        raise HTTPException(status_code=404, detail="Unknown worker")
+    return cls
+
+def _get_artifact_from_payload(
+    payload: dict[str, Any], cls: type[Worker]
+) -> FlowArtifact[Any]:
+    dir = payload.get("dir") or None
+    run_id = payload.get("run_id") or None
+    input_data = cls.input_model.model_validate(payload["data"])
+
+    # Making sure the payload is valid
+    return FlowArtifact.new(data=input_data, dir=dir, run_id=run_id)
+
+# --------------------------------------------------------------------------- #
+# GET Routes                                                                  #
+# --------------------------------------------------------------------------- #
+
+class WorkerMeta(BaseModel):
+    name: str
+    input: str
+    output: str
+
+@router.get("", response_model=list[WorkerMeta])
+def list_workers() -> list[WorkerMeta]:
+    return [
+        WorkerMeta(
+            name=worker.worker_name,
+            input=worker.input_model.__name__,
+            output=worker.output_model.__name__,
+        )
+        for worker in WORKERS.values()
+    ]
+
+
+@router.get("/orders", response_model=list[Order])
+def list_orders() -> list[Order]:
+    return list(_ORDER_POOL.values())
+
+
+# --------------------------------------------------------------------------- #
+# POST Routes                                                                 #
+# --------------------------------------------------------------------------- #
+
+# ---------- Submit and get the result ----------------------------------------
+@router.post("/{worker_name}/submit", response_model=FlowArtifact, status_code=202)
+def submit_worker(worker_name: str, payload: dict[str, Any]) -> FlowArtifact:
+    cls = _get_worker_or_404(worker_name)
+
+    try:
+        art = _get_artifact_from_payload(payload, cls)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+    try:
+        return cls.submit(art)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+# Submit on existing run ----------------------------------------------
+@router.post(
+    "/{worker_name}/submit/{prev_run_id}/chain",
+    response_model=FlowArtifact,
+    status_code=202,
+)
+def submit_chain(worker_name: str, prev_run_id: str) -> FlowArtifact:
+    cls = _get_worker_or_404(worker_name)
+
+    try:
+        store = WorkerStore.open(prev_run_id)
+        run_id = prev_run_id
+        dir = store.data_dir
+        data = cls.input_model.model_validate(store.load_model())
+
+        art = FlowArtifact.new(run_id=run_id, dir=dir, data=data)
+
+        if art is None:
+            raise HTTPException(status_code=400, detail="No artifact found")
+
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    try:
+        return cls.submit(art)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# Submit and chain, with the latest output of a worker
+@router.post(
+    "/{worker_name}/submit/{prev_worker_name}/chain/latest",
+    response_model=FlowArtifact | None,
+    status_code=202,
+)
+def submit_chain_latest(worker_name: str, prev_worker_name: str) -> FlowArtifact | None:
+    cls = _get_worker_or_404(worker_name)
+    prev_cls = _get_worker_or_404(prev_worker_name)
+
+    artifact: dict[str, Any] | None = WorkerStore.load_latest(
+        worker_name=prev_worker_name
+    )
+    if artifact is None:
+        raise HTTPException(status_code=404, detail="No runs found")
+
+    run_id = artifact["run_id"]
+    dir = artifact["dir"]
+    data = artifact["data"]
+    data = prev_cls.output_model.model_validate(data)
+
+    art = FlowArtifact.new(run_id=run_id, dir=dir, data=data)
+
+    return cls.submit(art)
+
+
+# ---------- Place an Order and get a receipt ----------------------------------------------------
+@router.post("/{worker_name}/order", response_model=Order, status_code=202)
+def place_order(worker_name: str, payload: dict[str, Any]) -> Order:
+    cls = _get_worker_or_404(worker_name)
+
+    try:
+        art = _get_artifact_from_payload(payload, cls)
+        # order_id = str(uuid.uuid4())
+        order_id = "731ce6ef-ccdc-44bd-b152-da126f104db1"
+        order = Order(order_id=order_id, worker_name=worker_name, payload=art.model_dump())
+        order.accept()
+        return order
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.post(
+    "/{worker_name}/order/{prev_run_id}/chain", response_model=Order, status_code=202
+)
+def chain_order(worker_name: str, prev_run_id: str) -> Order:
+    cls = _get_worker_or_404(worker_name)
+
+    try:
+        store = WorkerStore.open(prev_run_id)
+        run_id = prev_run_id
+        dir = store.data_dir
+        data = cls.input_model.model_validate(store.load_model())
+
+        art = FlowArtifact.new(run_id=run_id, dir=dir, data=data)
+
+        if art is None:
+            raise HTTPException(status_code=400, detail="No artifact found")
+
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    try:
+        order = Order(order_id=str(uuid.uuid4()), worker_name=worker_name, payload=art.model_dump())
+        order.accept()
+        return order
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
--- a/librarian/atlas-librarian/src/atlas_librarian/app/init.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/app/init.py
@ -0,0 +1,7 @@
+from .app import app
+from .cli import deploy
+
+__all__ = [
+    "app",
+    "deploy",
+]
--- a/librarian/atlas-librarian/src/atlas_librarian/app/app.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/app/app.py
@ -0,0 +1,68 @@
+# atlas_librarian/app.py
+import logging
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from librarian_core.utils.secrets_loader import load_env
+
+from atlas_librarian.api import recipes_router, runs_router, worker_router
+from atlas_librarian.stores import discover_workers
+
+# Application description for OpenAPI docs
+APP_DESCRIPTION = """
+Atlas Librarian API Gateway 🚀
+
+Provides endpoints to:
+*   Start and track background tasks (crawl, download, etc).
+
+Uses background tasks for long-running operations (crawl, download).
+Initially uses mock data, intended to be connected to backend workers.
+"""
+
+API_PREFIX = "/api"
+
+def create_app() -> FastAPI:
+    load_env()
+
+    discover_workers()
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format="%(message)s",
+    )
+
+    app = FastAPI(
+        title="Atlas Librarian API",
+        version="0.1.0",  # Use semantic versioning
+        description=APP_DESCRIPTION,
+    )
+
+    # Configure CORS
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    # Include all API routers
+    app.include_router(worker_router, prefix=f"{API_PREFIX}/worker")
+    app.include_router(runs_router, prefix=f"{API_PREFIX}/runs")
+    app.include_router(recipes_router, prefix=f"{API_PREFIX}/recipes")
+
+    return app
+
+# Create the app instance
+app = create_app()
+
+
+@app.get("/", tags=["Root"], summary="API Root/Health Check")
+async def read_root():
+    """
+    Provides a basic health check endpoint.
+    Returns a welcome message indicating the API is running.
+    """
+    return {"message": "Welcome to Atlas Librarian API"}
+
+
+# Removed direct run block, use uvicorn command line: uvicorn src.main:app --reload
--- a/librarian/atlas-librarian/src/atlas_librarian/app/cli.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/app/cli.py
@ -0,0 +1,3 @@
+# TODO: Implement deployment logic
+def deploy():
+    pass
--- a/librarian/atlas-librarian/src/atlas_librarian/app/py.typed
+++ b/librarian/atlas-librarian/src/atlas_librarian/app/py.typed
--- a/librarian/atlas-librarian/src/atlas_librarian/py.typed
+++ b/librarian/atlas-librarian/src/atlas_librarian/py.typed
--- a/librarian/atlas-librarian/src/atlas_librarian/recipes/init.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/recipes/init.py
--- a/librarian/atlas-librarian/src/atlas_librarian/recipes/develop.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/recipes/develop.py
@ -0,0 +1,92 @@
+example_moodle_index = {
+    "degree_program": {
+        "id": "1157",
+        "name": "Computational and Data Science",
+        "terms": [
+            {
+                "id": "1745",
+                "name": "FS25",
+                "courses": [
+                    {
+                        "id": "18240",
+                        "name": "Effiziente Algorithmen",
+                        "activity_type": "",
+                        "hero_image": "",
+                        "content_ressource_id": "1125554",
+                        "files": [],
+                    },
+                    {
+                        "id": "18237",
+                        "name": "Mathematik II",
+                        "activity_type": "",
+                        "hero_image": "https://moodle.fhgr.ch/pluginfile.php/1125458/course/overviewfiles/Integration_Differential_b.png",
+                        "content_ressource_id": "1125458",
+                        "files": [],
+                    },
+                ],
+            },
+        ],
+    },
+}
+
+example_study_program = {
+    "id": "1157",
+    "name": "Computational and Data Science",
+}
+
+example_downloaded_courses = {
+    "terms": [
+        {
+            "id": "1745",
+            "name": "FS25",
+            "course_zips": [
+                {"id": "18863", "name": "Programmierung und Prompt Engineering II"},
+                {"id": "18240", "name": "Effiziente Algorithmen"},
+                {"id": "18237", "name": "Mathematik II"},
+                {"id": "18236", "name": "2025 FS FHGR CDS Numerische Methoden"},
+                {"id": "18228", "name": "Datenbanken und Datenverarbeitung"},
+            ],
+        },
+        {
+            "id": "1746",
+            "name": "HS24",
+            "course_zips": [
+                {"id": "18030", "name": "Bootcamp Wissenschaftliches Arbeiten"},
+                {"id": "17527", "name": "Einführung in Data Science"},
+                {"id": "17526", "name": "Einführung in Computational Science"},
+                {"id": "17525", "name": "Mathematik I"},
+                {"id": "17507", "name": "Programmierung und Prompt Engineering"},
+                {"id": "17505", "name": "Algorithmen und Datenstrukturen"},
+                {"id": "17503", "name": "Computer Science"},
+            ],
+        },
+    ],
+}
+
+recipes = [
+    {
+        "name": "development",
+        "steps": [
+            # "crawl",
+            # "download",
+            "extract",
+        ],
+        # Default-Parameters
+        "parameters": {
+            "crawl": example_study_program,
+            "download": example_moodle_index,
+            "extract": example_downloaded_courses,
+        },
+    },
+    # All steps in one go
+    {
+        "name": "quick-all",
+        "steps": ["crawl", "download", "extract"],
+        "parameters": {
+            "crawl": {
+                "id": "1157",
+                "name": "Computational and Data Science",
+            },
+        },
+    },
+]
--- a/librarian/atlas-librarian/src/atlas_librarian/stores/init.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/stores/init.py
@ -0,0 +1,6 @@
+from .workers import (
+    discover_workers,
+    WORKERS,
+)
+
+__all__ = ["discover_workers", "WORKERS"]
--- a/librarian/atlas-librarian/src/atlas_librarian/stores/workers.py
+++ b/librarian/atlas-librarian/src/atlas_librarian/stores/workers.py
@ -0,0 +1,82 @@
+# atlas_librarian/stores/worker_store.py
+"""
+Auto-discovers every third-party Worker package that exposes an entry-point
+
+    # e.g. the scraper package:
+    [project.entry-points."librarian.worker"]
+    downloader = "librarian_downloader.downloader"
+    extractor  = "librarian_extractor.extractor"
+
+and loads the Worker classes so they are available to the UI / API layer.
+
+Nothing in this module imports *atlas_librarian* from the core side – it only
+talks *inwards* (importing the plugin code) which avoids circular dependencies.
+"""
+
+from __future__ import annotations
+
+import inspect
+from types import ModuleType
+from typing import Dict, Type
+
+try:  # Py ≥ 3.10
+    from importlib.metadata import EntryPoint, entry_points
+except ImportError:  # Py < 3.10 → fall back to back-port
+    from importlib_metadata import EntryPoint, entry_points  # type: ignore
+
+from librarian_core.workers.base import Worker
+
+# --------------------------------------------------------------------------------------
+
+WORKERS: Dict[str, Type[Worker]] = {}  # key = Worker.worker_name
+
+
+# --------------------------------------------------------------------------------------
+def _register_worker_class(obj: object) -> None:
+    """
+    Inspect *obj* and register it if it looks like a Worker subclass
+    produced by the metaclass in *librarian_core*.
+    """
+    if (
+        inspect.isclass(obj)
+        and issubclass(obj, Worker)
+        and obj is not Worker  # not the abstract base
+    ):
+        WORKERS[obj.worker_name] = obj  # type: ignore[arg-type]
+
+
+# --------------------------------------------------------------------------------------
+def _import_ep(ep: EntryPoint):
+    """Load the object referenced by an entry-point."""
+    return ep.load()
+
+
+# --------------------------------------------------------------------------------------
+def discover_workers(group: str = "librarian.worker") -> None:
+    """
+    Discover all entry-points of *group* and populate ``WORKERS``.
+    Safe to call multiple times – duplicates are ignored.
+    """
+    # support both modern and legacy entry_points APIs
+    try:
+        eps = entry_points(group=group)
+    except TypeError:
+        eps = entry_points().select(group=group)  # type: ignore[attr-defined]
+
+    for ep in eps:
+        try:
+            loaded = _import_ep(ep)
+        except Exception as exc:  # pragma: no cover
+            print(f"[Worker-Loader] Failed to load entry-point {ep!r}: {exc}")
+            continue
+
+        # If a module was loaded, inspect its attributes; else try registering directly
+        if isinstance(loaded, ModuleType):
+            for attr in loaded.__dict__.values():
+                _register_worker_class(attr)
+        else:
+            _register_worker_class(loaded)
+
+    # Register any Worker subclasses imported directly (e.g., loaded via atlas_librarian/api/__init__)
+    for cls in Worker.__subclasses__():
+        _register_worker_class(cls)
--- a/librarian/atlas-librarian/uv.lock
+++ b/librarian/atlas-librarian/uv.lock