Add API to repo
This commit is contained in:
parent
f80792d739
commit
52d087ce90
41
librarian/atlas-librarian/.prefectignore
Normal file
41
librarian/atlas-librarian/.prefectignore
Normal file
@ -0,0 +1,41 @@
|
||||
# prefect artifacts
|
||||
.prefectignore
|
||||
|
||||
# python artifacts
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.egg-info/
|
||||
*.egg
|
||||
|
||||
# Type checking artifacts
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
.pyre/
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
*.ipynb_checkpoints/*
|
||||
|
||||
# Environments
|
||||
.python-version
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
|
||||
# MacOS
|
||||
.DS_Store
|
||||
|
||||
# Dask
|
||||
dask-worker-space/
|
||||
|
||||
# Editors
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# VCS
|
||||
.git/
|
||||
.hg/
|
14
librarian/atlas-librarian/README.md
Normal file
14
librarian/atlas-librarian/README.md
Normal file
@ -0,0 +1,14 @@
|
||||
# Usage
|
||||
|
||||
## Serving the api
|
||||
run the following command to start the server
|
||||
|
||||
```bash
|
||||
uv run uvicorn atlas_librarian.app:app --reload --port 8000
|
||||
```
|
||||
|
||||
## Deploy to prefect
|
||||
run the following command to deploy the flow to prefect
|
||||
```bash
|
||||
uv run deploy
|
||||
```
|
117
librarian/atlas-librarian/docs/api_specification.md
Normal file
117
librarian/atlas-librarian/docs/api_specification.md
Normal file
@ -0,0 +1,117 @@
|
||||
# Atlas Librarian API Specification (v0.1.0)
|
||||
|
||||
This document outlines the endpoints provided by the Atlas Librarian API Gateway.
|
||||
|
||||
**Base URL:** `/v1`
|
||||
|
||||
## Authentication
|
||||
|
||||
Currently, no authentication is required.
|
||||
|
||||
## Tasks
|
||||
|
||||
Long-running operations like crawling and downloading are handled via background tasks. You initiate a task via a POST request and then poll the status endpoint using the returned `task_id`.
|
||||
|
||||
### Task Object (`TaskInfo`)
|
||||
|
||||
```json
|
||||
{
|
||||
"task_id": "string (uuid)",
|
||||
"name": "string | null (e.g., 'crawl', 'download')",
|
||||
"state": "string (queued | running | success | error)",
|
||||
"progress": "number | null (0.0 to 1.0, only present while running)",
|
||||
"detail": "string | null (details about state, especially errors)",
|
||||
"download_links": "array[string] | null (only for successful download tasks)"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Endpoints
|
||||
|
||||
### Crawler
|
||||
|
||||
**Tags:** `Crawler`
|
||||
|
||||
- **`POST /v1/crawl`**
|
||||
- **Summary:** Start Course Crawl
|
||||
- **Description:** Initiates a background task to crawl Moodle course information. `no_cache=true` forces a fresh crawl.
|
||||
- **Request Body:** `CrawlRequest`
|
||||
|
||||
```json
|
||||
{
|
||||
"no_cache": "boolean (optional, default: false)"
|
||||
}
|
||||
```
|
||||
|
||||
- **Response (202 Accepted):** `TaskInfo` (initial state: queued)
|
||||
- **`GET /v1/crawl/status/{task_id}`**
|
||||
- **Summary:** Get Crawl Task Status
|
||||
- **Description:** Retrieves the current status (queued, running, success, error) and progress of a specific crawl task.
|
||||
- **Path Parameter:** `task_id` (string)
|
||||
- **Response (200 OK):** `TaskInfo`
|
||||
- **Response (404 Not Found):** If `task_id` does not exist.
|
||||
|
||||
### Downloader
|
||||
|
||||
**Tags:** `Downloader`
|
||||
|
||||
- **`POST /v1/download`**
|
||||
- **Summary:** Start Course Material Download
|
||||
- **Description:** Initiates a background task to download materials for the specified list of courses.
|
||||
- **Request Body:** `DownloadRequest`
|
||||
|
||||
```json
|
||||
{
|
||||
"courses": [
|
||||
{
|
||||
"module_id": "string",
|
||||
"term_id": "string"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- **Response (202 Accepted):** `TaskInfo` (initial state: queued)
|
||||
- **Response (400 Bad Request):** If `courses` list is empty.
|
||||
- **`GET /v1/download/status/{task_id}`**
|
||||
- **Summary:** Get Download Task Status
|
||||
- **Description:** Retrieves the current status (queued, running, success, error) and progress of a specific download task. Includes `download_links` on success.
|
||||
- **Path Parameter:** `task_id` (string)
|
||||
- **Response (200 OK):** `TaskInfo`
|
||||
- **Response (404 Not Found):** If `task_id` does not exist.
|
||||
|
||||
### Metadata
|
||||
|
||||
**Tags:** `Metadata`
|
||||
|
||||
- **`GET /v1/terms`**
|
||||
- **Summary:** Get All Terms
|
||||
- **Description:** Retrieves a list of all available academic terms (e.g., 'HS24', 'FS25').
|
||||
- **Response (200 OK):** `List[string]`
|
||||
|
||||
```json
|
||||
["HS24", "FS25"]
|
||||
```
|
||||
|
||||
### Summaries
|
||||
|
||||
**Tags:** `Summaries`
|
||||
|
||||
- **`GET /v1/summaries/{term_id}/{course_id}`**
|
||||
- **Summary:** Get Course Summary
|
||||
- **Description:** Retrieves the generated summary content (in Markdown format) for a specific course within a specific term.
|
||||
- **Path Parameters:** `term_id` (string), `course_id` (string)
|
||||
- **Response (200 OK):** `SummaryResponse`
|
||||
|
||||
```json
|
||||
{
|
||||
"summary": "string (Markdown)"
|
||||
}
|
||||
```
|
||||
|
||||
- **Response (404 Not Found):** If the summary for the given course/term does not exist.
|
||||
|
||||
---
|
||||
|
||||
_Generated based on FastAPI application structure._
|
44
librarian/atlas-librarian/pyproject.toml
Normal file
44
librarian/atlas-librarian/pyproject.toml
Normal file
@ -0,0 +1,44 @@
|
||||
[project]
|
||||
name = "atlas-librarian"
|
||||
version = "0.1.0"
|
||||
description = "FastAPI gateway and runtime pipeline for Librarian"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "DotNaos", email = "schuetzoliver00@gmail.com" }
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"fastapi",
|
||||
"dotenv",
|
||||
"uvicorn[standard]",
|
||||
"importlib_metadata; python_version<'3.10'",
|
||||
"librarian-core",
|
||||
"librarian-extractor",
|
||||
"librarian-scraper",
|
||||
"prefect>=3.4.1",
|
||||
]
|
||||
|
||||
# [tool.uv.sources]
|
||||
# librarian-core = { git = "https://github.com/DotNaos/librarian-core", rev = "main" }
|
||||
# librarian-extractor = { git = "https://github.com/DotNaos/librarian-extractor", rev = "main" }
|
||||
# librarian-scraper = { git = "https://github.com/DotNaos/librarian-scraper", rev = "main" }
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling>=1.21"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/atlas_librarian", "src/atlas_librarian/app"]
|
||||
|
||||
[tool.hatch.metadata]
|
||||
allow-direct-references = true
|
||||
|
||||
[project.scripts]
|
||||
deploy = "atlas_librarian.app.cli:deploy"
|
||||
|
||||
|
||||
# ───────── optional: dev / test extras ─────────
|
||||
[project.optional-dependencies]
|
||||
dev = ["typer", "ruff", "pytest", "mypy"]
|
20
librarian/atlas-librarian/src/atlas_librarian/__init__.py
Normal file
20
librarian/atlas-librarian/src/atlas_librarian/__init__.py
Normal file
@ -0,0 +1,20 @@
|
||||
import pkgutil
|
||||
import importlib
|
||||
|
||||
__all__ = []
|
||||
|
||||
# Iterate over all modules in this package
|
||||
for finder, module_name, is_pkg in pkgutil.iter_modules(__path__):
|
||||
# import the sub-module
|
||||
module = importlib.import_module(f"{__name__}.{module_name}")
|
||||
|
||||
# decide which names to re-export:
|
||||
# use module.__all__ if it exists, otherwise every non-private attribute
|
||||
public_names = getattr(
|
||||
module, "__all__", [n for n in dir(module) if not n.startswith("_")]
|
||||
)
|
||||
|
||||
# bring each name into the package namespace
|
||||
for name in public_names:
|
||||
globals()[name] = getattr(module, name)
|
||||
__all__.append(name) # type: ignore
|
@ -0,0 +1,23 @@
|
||||
# ------------------------------------------------------ #
|
||||
# Workers have to be imported here to be discovered by the worker loader
|
||||
# ------------------------------------------------------ #
|
||||
from librarian_chunker.chunker import Chunker
|
||||
from librarian_extractor.ai_sanitizer import AISanitizer
|
||||
from librarian_extractor.extractor import Extractor
|
||||
from librarian_scraper.crawler import Crawler
|
||||
from librarian_scraper.downloader import Downloader
|
||||
|
||||
from atlas_librarian.api.recipes import router as recipes_router
|
||||
from atlas_librarian.api.runs import router as runs_router
|
||||
from atlas_librarian.api.worker import router as worker_router
|
||||
|
||||
__all__ = [
|
||||
"worker_router",
|
||||
"runs_router",
|
||||
"recipes_router",
|
||||
"Crawler",
|
||||
"Downloader",
|
||||
"Extractor",
|
||||
"AISanitizer",
|
||||
"Chunker",
|
||||
]
|
123
librarian/atlas-librarian/src/atlas_librarian/api/recipes.py
Normal file
123
librarian/atlas-librarian/src/atlas_librarian/api/recipes.py
Normal file
@ -0,0 +1,123 @@
|
||||
"""
|
||||
Recipe API
|
||||
==========
|
||||
|
||||
POST /recipes/run → start a pipeline (sequence of workers)
|
||||
GET /recipes/{id} → metadata (simple JSON)
|
||||
WS /recipes/{id}/events → live step-wise updates
|
||||
|
||||
The recipe engine constructs a *single* Prefect flow in-memory that
|
||||
executes workers one after another, automatically forwarding each
|
||||
FlowArtifact to the next worker.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as dt
|
||||
import json
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
from typing import Any, List
|
||||
|
||||
import anyio
|
||||
from atlas_librarian.api.runs import RunInfo, _open_store
|
||||
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
|
||||
from librarian_core.workers.base import FlowArtifact, Worker
|
||||
from prefect import Flow, flow, get_client, get_run_logger
|
||||
from pydantic import BaseModel, Field, TypeAdapter
|
||||
|
||||
from atlas_librarian.stores.workers import WORKERS
|
||||
|
||||
router = APIRouter(tags=["recipes"])
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Pydantic models #
|
||||
# --------------------------------------------------------------------------- #
|
||||
class RecipeRequest(BaseModel):
|
||||
workers: List[str] = Field(min_length=1)
|
||||
payload: dict # input of the first worker
|
||||
|
||||
|
||||
class RecipeMetadata(BaseModel):
|
||||
id: str
|
||||
created: dt.datetime
|
||||
worker_chain: List[str]
|
||||
flow_run_id: str | None = None
|
||||
|
||||
|
||||
# in-memory “DB”
|
||||
_RECIPES: dict[str, RecipeMetadata] = {}
|
||||
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# routes #
|
||||
# --------------------------------------------------------------------------- #
|
||||
@router.post("/run", status_code=202, response_model=list[FlowArtifact])
|
||||
def run_recipe(req: RecipeRequest) -> list[FlowArtifact]:
|
||||
# validation of worker chain
|
||||
for w in req.workers:
|
||||
if w not in WORKERS:
|
||||
raise HTTPException(400, f"Unknown worker: {w}")
|
||||
|
||||
artifacts: list[FlowArtifact] = []
|
||||
|
||||
start_worker: type[Worker] = WORKERS[req.workers[0]]
|
||||
|
||||
payload = start_worker.input_model.model_validate(req.payload)
|
||||
|
||||
async def _run_worker(worker: type[Worker], art: FlowArtifact) -> FlowArtifact:
|
||||
return await worker.flow()(art)
|
||||
|
||||
# Kick off the first worker
|
||||
art: FlowArtifact = anyio.run(_run_worker, start_worker, FlowArtifact(data=payload, run_id=str(uuid.uuid4()), dir=Path(".")))
|
||||
artifacts.append(art)
|
||||
|
||||
for wn in req.workers[1:]:
|
||||
worker: type[Worker] = WORKERS[wn]
|
||||
art = anyio.run(_run_worker, worker, art)
|
||||
artifacts.append(art)
|
||||
|
||||
if not artifacts or len(artifacts) != len(req.workers):
|
||||
raise HTTPException(500, "Failed to run recipe")
|
||||
|
||||
return artifacts
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@router.get("/{recipe_id}", response_model=RecipeMetadata)
|
||||
def get_recipe(recipe_id: str):
|
||||
meta = _RECIPES.get(recipe_id)
|
||||
if not meta:
|
||||
raise HTTPException(404, "Recipe not found")
|
||||
return meta
|
||||
|
||||
|
||||
@router.websocket("/{recipe_id}/events")
|
||||
async def ws_recipe_events(ws: WebSocket, recipe_id: str):
|
||||
"""
|
||||
Streams *flow-level* events for the recipe: each worker completion.
|
||||
"""
|
||||
meta = _RECIPES.get(recipe_id)
|
||||
if not meta or not meta.flow_run_id:
|
||||
raise HTTPException(404, "Recipe not running or unknown")
|
||||
|
||||
await ws.accept()
|
||||
try:
|
||||
async with get_client() as client:
|
||||
async for state in client.stream_flow_run_states(meta.flow_run_id): # type: ignore
|
||||
await ws.send_text(state.json(include={"type", "name"}))
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
await ws.close()
|
85
librarian/atlas-librarian/src/atlas_librarian/api/runs.py
Normal file
85
librarian/atlas-librarian/src/atlas_librarian/api/runs.py
Normal file
@ -0,0 +1,85 @@
|
||||
"""
|
||||
Light-weight *runs* endpoint to query artefacts produced by workers.
|
||||
|
||||
• `GET /runs/{run_id}` → metadata & locations for a finished / running flow
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from librarian_core.storage.worker_store import WorkerStore
|
||||
from pydantic import BaseModel
|
||||
|
||||
router = APIRouter(tags=["runs"])
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# response model #
|
||||
# --------------------------------------------------------------------------- #
|
||||
class RunInfo(BaseModel):
|
||||
run_id: str
|
||||
worker: str
|
||||
state: str
|
||||
dir: Path
|
||||
data: dict | None = None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# helper #
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _open_store(run_id: str) -> WorkerStore:
|
||||
try:
|
||||
return WorkerStore.open(run_id)
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="Run-id not found") from exc
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# routes #
|
||||
# --------------------------------------------------------------------------- #
|
||||
@router.get("/{run_id}", response_model=RunInfo)
|
||||
def get_run(run_id: str) -> RunInfo:
|
||||
"""
|
||||
Return coarse-grained information about a single flow run.
|
||||
|
||||
For the web-UI we expose only minimal metadata plus the local directory
|
||||
where files were written; clients can read further details from disk.
|
||||
"""
|
||||
store = _open_store(run_id)
|
||||
meta = store.metadata # {'worker_name': …, 'state': …, …}
|
||||
|
||||
return RunInfo(
|
||||
run_id=run_id,
|
||||
worker=meta["worker_name"],
|
||||
state=meta["state"],
|
||||
dir=store.data_dir,
|
||||
data=store.load_model(as_dict=True), # type: ignore
|
||||
)
|
||||
|
||||
@router.get("/{worker_name}/latest", response_model=RunInfo | None)
|
||||
def get_latest_run(worker_name: str) -> RunInfo | None:
|
||||
artifact: dict[str, Any] | None = WorkerStore.load_latest(worker_name=worker_name)
|
||||
if artifact is None:
|
||||
raise HTTPException(status_code=404, detail="No runs found")
|
||||
|
||||
store = _open_store(artifact["run_id"])
|
||||
meta = store.metadata
|
||||
return RunInfo(
|
||||
run_id=artifact["run_id"],
|
||||
worker=worker_name,
|
||||
state=meta["state"],
|
||||
dir=artifact["dir"],
|
||||
data=artifact["data"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{run_id}/artifact")
|
||||
def get_artifact(run_id: str) -> str:
|
||||
store = _open_store(run_id)
|
||||
# Check if the artifact.md file exists
|
||||
if not store._run_dir.joinpath("artifact.md").exists():
|
||||
raise HTTPException(status_code=404, detail="Artifact not found")
|
||||
return store._run_dir.joinpath("artifact.md").read_text()
|
204
librarian/atlas-librarian/src/atlas_librarian/api/worker.py
Normal file
204
librarian/atlas-librarian/src/atlas_librarian/api/worker.py
Normal file
@ -0,0 +1,204 @@
|
||||
"""
|
||||
REST endpoints exposing individual workers + *live-status* stream.
|
||||
|
||||
GET /workers → ["Crawler", "Downloader", …]
|
||||
POST /workers/{name} → start worker; body = {"dir": <str>?, "data": …}
|
||||
POST /workers/{name}/chain → body = previous FlowArtifact
|
||||
WS /workers/{run_id}/events → live Prefect state changes for that run
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from librarian_core.storage.worker_store import WorkerStore
|
||||
from librarian_core.workers.base import FlowArtifact, Worker
|
||||
from pydantic import BaseModel
|
||||
|
||||
from atlas_librarian.stores.workers import WORKERS
|
||||
|
||||
router = APIRouter(tags=["workers"])
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# response schema #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
class Order(BaseModel):
|
||||
order_id: str
|
||||
worker_name: str
|
||||
payload: dict
|
||||
|
||||
# Job is accepted and will be moved in the Job Pool
|
||||
def accept(self):
|
||||
_ORDER_POOL[self.order_id] = self
|
||||
return self.order_id
|
||||
|
||||
# Job is completed and will be removed from the Job Pool
|
||||
def complete(self):
|
||||
del _ORDER_POOL[self.order_id]
|
||||
|
||||
|
||||
_ORDER_POOL: dict[str, Order] = {}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# helpers #
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _get_worker_or_404(name: str):
|
||||
cls = WORKERS.get(name)
|
||||
if cls is None:
|
||||
raise HTTPException(status_code=404, detail="Unknown worker")
|
||||
return cls
|
||||
|
||||
def _get_artifact_from_payload(
|
||||
payload: dict[str, Any], cls: type[Worker]
|
||||
) -> FlowArtifact[Any]:
|
||||
dir = payload.get("dir") or None
|
||||
run_id = payload.get("run_id") or None
|
||||
input_data = cls.input_model.model_validate(payload["data"])
|
||||
|
||||
# Making sure the payload is valid
|
||||
return FlowArtifact.new(data=input_data, dir=dir, run_id=run_id)
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# GET Routes #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
class WorkerMeta(BaseModel):
|
||||
name: str
|
||||
input: str
|
||||
output: str
|
||||
|
||||
@router.get("", response_model=list[WorkerMeta])
|
||||
def list_workers() -> list[WorkerMeta]:
|
||||
return [
|
||||
WorkerMeta(
|
||||
name=worker.worker_name,
|
||||
input=worker.input_model.__name__,
|
||||
output=worker.output_model.__name__,
|
||||
)
|
||||
for worker in WORKERS.values()
|
||||
]
|
||||
|
||||
|
||||
@router.get("/orders", response_model=list[Order])
|
||||
def list_orders() -> list[Order]:
|
||||
return list(_ORDER_POOL.values())
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# POST Routes #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# ---------- Submit and get the result ----------------------------------------
|
||||
@router.post("/{worker_name}/submit", response_model=FlowArtifact, status_code=202)
|
||||
def submit_worker(worker_name: str, payload: dict[str, Any]) -> FlowArtifact:
|
||||
cls = _get_worker_or_404(worker_name)
|
||||
|
||||
try:
|
||||
art = _get_artifact_from_payload(payload, cls)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
try:
|
||||
return cls.submit(art)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Submit on existing run ----------------------------------------------
|
||||
@router.post(
|
||||
"/{worker_name}/submit/{prev_run_id}/chain",
|
||||
response_model=FlowArtifact,
|
||||
status_code=202,
|
||||
)
|
||||
def submit_chain(worker_name: str, prev_run_id: str) -> FlowArtifact:
|
||||
cls = _get_worker_or_404(worker_name)
|
||||
|
||||
try:
|
||||
store = WorkerStore.open(prev_run_id)
|
||||
run_id = prev_run_id
|
||||
dir = store.data_dir
|
||||
data = cls.input_model.model_validate(store.load_model())
|
||||
|
||||
art = FlowArtifact.new(run_id=run_id, dir=dir, data=data)
|
||||
|
||||
if art is None:
|
||||
raise HTTPException(status_code=400, detail="No artifact found")
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
try:
|
||||
return cls.submit(art)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# Submit and chain, with the latest output of a worker
|
||||
@router.post(
|
||||
"/{worker_name}/submit/{prev_worker_name}/chain/latest",
|
||||
response_model=FlowArtifact | None,
|
||||
status_code=202,
|
||||
)
|
||||
def submit_chain_latest(worker_name: str, prev_worker_name: str) -> FlowArtifact | None:
|
||||
cls = _get_worker_or_404(worker_name)
|
||||
prev_cls = _get_worker_or_404(prev_worker_name)
|
||||
|
||||
artifact: dict[str, Any] | None = WorkerStore.load_latest(
|
||||
worker_name=prev_worker_name
|
||||
)
|
||||
if artifact is None:
|
||||
raise HTTPException(status_code=404, detail="No runs found")
|
||||
|
||||
run_id = artifact["run_id"]
|
||||
dir = artifact["dir"]
|
||||
data = artifact["data"]
|
||||
data = prev_cls.output_model.model_validate(data)
|
||||
|
||||
art = FlowArtifact.new(run_id=run_id, dir=dir, data=data)
|
||||
|
||||
return cls.submit(art)
|
||||
|
||||
|
||||
# ---------- Place an Order and get a receipt ----------------------------------------------------
|
||||
@router.post("/{worker_name}/order", response_model=Order, status_code=202)
|
||||
def place_order(worker_name: str, payload: dict[str, Any]) -> Order:
|
||||
cls = _get_worker_or_404(worker_name)
|
||||
|
||||
try:
|
||||
art = _get_artifact_from_payload(payload, cls)
|
||||
# order_id = str(uuid.uuid4())
|
||||
order_id = "731ce6ef-ccdc-44bd-b152-da126f104db1"
|
||||
order = Order(order_id=order_id, worker_name=worker_name, payload=art.model_dump())
|
||||
order.accept()
|
||||
return order
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post(
|
||||
"/{worker_name}/order/{prev_run_id}/chain", response_model=Order, status_code=202
|
||||
)
|
||||
def chain_order(worker_name: str, prev_run_id: str) -> Order:
|
||||
cls = _get_worker_or_404(worker_name)
|
||||
|
||||
try:
|
||||
store = WorkerStore.open(prev_run_id)
|
||||
run_id = prev_run_id
|
||||
dir = store.data_dir
|
||||
data = cls.input_model.model_validate(store.load_model())
|
||||
|
||||
art = FlowArtifact.new(run_id=run_id, dir=dir, data=data)
|
||||
|
||||
if art is None:
|
||||
raise HTTPException(status_code=400, detail="No artifact found")
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
try:
|
||||
order = Order(order_id=str(uuid.uuid4()), worker_name=worker_name, payload=art.model_dump())
|
||||
order.accept()
|
||||
return order
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
@ -0,0 +1,7 @@
|
||||
from .app import app
|
||||
from .cli import deploy
|
||||
|
||||
__all__ = [
|
||||
"app",
|
||||
"deploy",
|
||||
]
|
68
librarian/atlas-librarian/src/atlas_librarian/app/app.py
Normal file
68
librarian/atlas-librarian/src/atlas_librarian/app/app.py
Normal file
@ -0,0 +1,68 @@
|
||||
# atlas_librarian/app.py
|
||||
import logging
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from librarian_core.utils.secrets_loader import load_env
|
||||
|
||||
from atlas_librarian.api import recipes_router, runs_router, worker_router
|
||||
from atlas_librarian.stores import discover_workers
|
||||
|
||||
# Application description for OpenAPI docs
|
||||
APP_DESCRIPTION = """
|
||||
Atlas Librarian API Gateway 🚀
|
||||
|
||||
Provides endpoints to:
|
||||
* Start and track background tasks (crawl, download, etc).
|
||||
|
||||
Uses background tasks for long-running operations (crawl, download).
|
||||
Initially uses mock data, intended to be connected to backend workers.
|
||||
"""
|
||||
|
||||
API_PREFIX = "/api"
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
load_env()
|
||||
|
||||
discover_workers()
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(message)s",
|
||||
)
|
||||
|
||||
app = FastAPI(
|
||||
title="Atlas Librarian API",
|
||||
version="0.1.0", # Use semantic versioning
|
||||
description=APP_DESCRIPTION,
|
||||
)
|
||||
|
||||
# Configure CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include all API routers
|
||||
app.include_router(worker_router, prefix=f"{API_PREFIX}/worker")
|
||||
app.include_router(runs_router, prefix=f"{API_PREFIX}/runs")
|
||||
app.include_router(recipes_router, prefix=f"{API_PREFIX}/recipes")
|
||||
|
||||
return app
|
||||
|
||||
# Create the app instance
|
||||
app = create_app()
|
||||
|
||||
|
||||
@app.get("/", tags=["Root"], summary="API Root/Health Check")
|
||||
async def read_root():
|
||||
"""
|
||||
Provides a basic health check endpoint.
|
||||
Returns a welcome message indicating the API is running.
|
||||
"""
|
||||
return {"message": "Welcome to Atlas Librarian API"}
|
||||
|
||||
|
||||
# Removed direct run block, use uvicorn command line: uvicorn src.main:app --reload
|
3
librarian/atlas-librarian/src/atlas_librarian/app/cli.py
Normal file
3
librarian/atlas-librarian/src/atlas_librarian/app/cli.py
Normal file
@ -0,0 +1,3 @@
|
||||
# TODO: Implement deployment logic
|
||||
def deploy():
|
||||
pass
|
@ -0,0 +1,92 @@
|
||||
example_moodle_index = {
|
||||
"degree_program": {
|
||||
"id": "1157",
|
||||
"name": "Computational and Data Science",
|
||||
"terms": [
|
||||
{
|
||||
"id": "1745",
|
||||
"name": "FS25",
|
||||
"courses": [
|
||||
{
|
||||
"id": "18240",
|
||||
"name": "Effiziente Algorithmen",
|
||||
"activity_type": "",
|
||||
"hero_image": "",
|
||||
"content_ressource_id": "1125554",
|
||||
"files": [],
|
||||
},
|
||||
{
|
||||
"id": "18237",
|
||||
"name": "Mathematik II",
|
||||
"activity_type": "",
|
||||
"hero_image": "https://moodle.fhgr.ch/pluginfile.php/1125458/course/overviewfiles/Integration_Differential_b.png",
|
||||
"content_ressource_id": "1125458",
|
||||
"files": [],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
example_study_program = {
|
||||
"id": "1157",
|
||||
"name": "Computational and Data Science",
|
||||
}
|
||||
|
||||
example_downloaded_courses = {
|
||||
"terms": [
|
||||
{
|
||||
"id": "1745",
|
||||
"name": "FS25",
|
||||
"course_zips": [
|
||||
{"id": "18863", "name": "Programmierung und Prompt Engineering II"},
|
||||
{"id": "18240", "name": "Effiziente Algorithmen"},
|
||||
{"id": "18237", "name": "Mathematik II"},
|
||||
{"id": "18236", "name": "2025 FS FHGR CDS Numerische Methoden"},
|
||||
{"id": "18228", "name": "Datenbanken und Datenverarbeitung"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "1746",
|
||||
"name": "HS24",
|
||||
"course_zips": [
|
||||
{"id": "18030", "name": "Bootcamp Wissenschaftliches Arbeiten"},
|
||||
{"id": "17527", "name": "Einführung in Data Science"},
|
||||
{"id": "17526", "name": "Einführung in Computational Science"},
|
||||
{"id": "17525", "name": "Mathematik I"},
|
||||
{"id": "17507", "name": "Programmierung und Prompt Engineering"},
|
||||
{"id": "17505", "name": "Algorithmen und Datenstrukturen"},
|
||||
{"id": "17503", "name": "Computer Science"},
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
recipes = [
|
||||
{
|
||||
"name": "development",
|
||||
"steps": [
|
||||
# "crawl",
|
||||
# "download",
|
||||
"extract",
|
||||
],
|
||||
# Default-Parameters
|
||||
"parameters": {
|
||||
"crawl": example_study_program,
|
||||
"download": example_moodle_index,
|
||||
"extract": example_downloaded_courses,
|
||||
},
|
||||
},
|
||||
# All steps in one go
|
||||
{
|
||||
"name": "quick-all",
|
||||
"steps": ["crawl", "download", "extract"],
|
||||
"parameters": {
|
||||
"crawl": {
|
||||
"id": "1157",
|
||||
"name": "Computational and Data Science",
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
@ -0,0 +1,6 @@
|
||||
from .workers import (
|
||||
discover_workers,
|
||||
WORKERS,
|
||||
)
|
||||
|
||||
__all__ = ["discover_workers", "WORKERS"]
|
@ -0,0 +1,82 @@
|
||||
# atlas_librarian/stores/worker_store.py
|
||||
"""
|
||||
Auto-discovers every third-party Worker package that exposes an entry-point
|
||||
|
||||
# e.g. the scraper package:
|
||||
[project.entry-points."librarian.worker"]
|
||||
downloader = "librarian_downloader.downloader"
|
||||
extractor = "librarian_extractor.extractor"
|
||||
|
||||
and loads the Worker classes so they are available to the UI / API layer.
|
||||
|
||||
Nothing in this module imports *atlas_librarian* from the core side – it only
|
||||
talks *inwards* (importing the plugin code) which avoids circular dependencies.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
from types import ModuleType
|
||||
from typing import Dict, Type
|
||||
|
||||
try: # Py ≥ 3.10
|
||||
from importlib.metadata import EntryPoint, entry_points
|
||||
except ImportError: # Py < 3.10 → fall back to back-port
|
||||
from importlib_metadata import EntryPoint, entry_points # type: ignore
|
||||
|
||||
from librarian_core.workers.base import Worker
|
||||
|
||||
# --------------------------------------------------------------------------------------
|
||||
|
||||
WORKERS: Dict[str, Type[Worker]] = {} # key = Worker.worker_name
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------
|
||||
def _register_worker_class(obj: object) -> None:
|
||||
"""
|
||||
Inspect *obj* and register it if it looks like a Worker subclass
|
||||
produced by the metaclass in *librarian_core*.
|
||||
"""
|
||||
if (
|
||||
inspect.isclass(obj)
|
||||
and issubclass(obj, Worker)
|
||||
and obj is not Worker # not the abstract base
|
||||
):
|
||||
WORKERS[obj.worker_name] = obj # type: ignore[arg-type]
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------
|
||||
def _import_ep(ep: EntryPoint):
|
||||
"""Load the object referenced by an entry-point."""
|
||||
return ep.load()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------
|
||||
def discover_workers(group: str = "librarian.worker") -> None:
|
||||
"""
|
||||
Discover all entry-points of *group* and populate ``WORKERS``.
|
||||
Safe to call multiple times – duplicates are ignored.
|
||||
"""
|
||||
# support both modern and legacy entry_points APIs
|
||||
try:
|
||||
eps = entry_points(group=group)
|
||||
except TypeError:
|
||||
eps = entry_points().select(group=group) # type: ignore[attr-defined]
|
||||
|
||||
for ep in eps:
|
||||
try:
|
||||
loaded = _import_ep(ep)
|
||||
except Exception as exc: # pragma: no cover
|
||||
print(f"[Worker-Loader] Failed to load entry-point {ep!r}: {exc}")
|
||||
continue
|
||||
|
||||
# If a module was loaded, inspect its attributes; else try registering directly
|
||||
if isinstance(loaded, ModuleType):
|
||||
for attr in loaded.__dict__.values():
|
||||
_register_worker_class(attr)
|
||||
else:
|
||||
_register_worker_class(loaded)
|
||||
|
||||
# Register any Worker subclasses imported directly (e.g., loaded via atlas_librarian/api/__init__)
|
||||
for cls in Worker.__subclasses__():
|
||||
_register_worker_class(cls)
|
1663
librarian/atlas-librarian/uv.lock
generated
Normal file
1663
librarian/atlas-librarian/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user