Automated commit
This commit is contained in:
parent
eeaf032964
commit
43bcc2122b
373
app/routes.py
373
app/routes.py
|
|
@ -1,131 +1,282 @@
|
|||
"""API routes.
|
||||
|
||||
Define your API endpoints here. All routes are prefixed with /api.
|
||||
|
||||
Built-in AI endpoints (via Druppie SDK):
|
||||
POST /api/ai/chat — LLM chat completion (body: {prompt, system?}) [module-llm]
|
||||
POST /api/ai/ocr — OCR text extraction (body: {image_url}) [module-vision]
|
||||
POST /api/ai/search — Web search (body: {query}) [module-web]
|
||||
|
||||
RAG endpoints (vectors stored in THIS app's own database):
|
||||
POST /api/rag/index — embed + store documents (body: {documents: [...]})
|
||||
POST /api/rag/search — semantic similarity search (body: {query})
|
||||
|
||||
Example adding your own:
|
||||
|
||||
@api.route('/items', methods=['GET'])
|
||||
def list_items():
|
||||
db = next(get_db())
|
||||
items = db.query(Item).all()
|
||||
return jsonify([{'id': str(i.id), 'name': i.name} for i in items])
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
from datetime import date
|
||||
from flask import Blueprint, jsonify, request
|
||||
from druppie_sdk import DruppieClient
|
||||
|
||||
from app.database import get_db
|
||||
from app.rag import RAG
|
||||
from app.models import Permit
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
api = Blueprint("api", __name__)
|
||||
|
||||
druppie = DruppieClient()
|
||||
|
||||
RAG_INDEX = "knowledge-base"
|
||||
ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "webp", "pdf"}
|
||||
MAX_FILE_SIZE = 25 * 1024 * 1024
|
||||
|
||||
# Selectielijst waterschappen 2012 — simplified lookup
|
||||
SELECTIELIJST = {
|
||||
"watervergunning_lozing": {"nominatie": "vernietigen", "jaren": 20},
|
||||
"watervergunning_onttrekking": {"nominatie": "vernietigen", "jaren": 20},
|
||||
"keurvergunning": {"nominatie": "vernietigen", "jaren": 10},
|
||||
"omgevingsvergunning": {"nominatie": "bewaren", "jaren": None},
|
||||
"lozingsvergunning": {"nominatie": "vernietigen", "jaren": 20},
|
||||
"onttrekkingsvergunning": {"nominatie": "vernietigen", "jaren": 15},
|
||||
"projectplan": {"nominatie": "bewaren", "jaren": None},
|
||||
"peilbesluit": {"nominatie": "bewaren", "jaren": None},
|
||||
"leggerwijziging": {"nominatie": "bewaren", "jaren": None},
|
||||
}
|
||||
|
||||
METADATA_EXTRACTION_PROMPT = """Analyseer het volgende vergunningdocument en extraheer de metadata.
|
||||
Antwoord ALLEEN met een JSON object met deze velden (laat leeg als niet gevonden):
|
||||
|
||||
{
|
||||
"permit_number": "vergunningnummer (K-xxxx, L-xxxx, etc.)",
|
||||
"applicant_name": "naam aanvrager",
|
||||
"permit_holder_name": "naam vergunninghouder",
|
||||
"issuer_name": "naam uitgever/verlener",
|
||||
"location": "locatie/adres",
|
||||
"issue_date": "uitgiftedatum (YYYY-MM-DD)",
|
||||
"expiry_date": "geldigheidsdatum (YYYY-MM-DD)",
|
||||
"applicable_law": "toepasselijke wet of regeling",
|
||||
"work_type": "type werk/activiteit",
|
||||
"water_type": "type oppervlaktewater",
|
||||
"embankment_type": "type waterkering",
|
||||
"permit_type": "type vergunning (watervergunning_lozing, keurvergunning, etc.)",
|
||||
"source_system": "bronsysteem indien herkenbaar"
|
||||
}
|
||||
|
||||
Document tekst:
|
||||
"""
|
||||
|
||||
|
||||
def _allowed_file(fn):
|
||||
return "." in fn and fn.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
def _compute_archive_status(permit_type, issue_date_str):
|
||||
"""Compute archive nomination, retention, and status per selectielijst."""
|
||||
key = (permit_type or "").strip().lower().replace(" ", "_")
|
||||
info = SELECTIELIJST.get(key, {"nominatie": "onbekend", "jaren": None})
|
||||
nominatie = info["nominatie"]
|
||||
jaren = info["jaren"]
|
||||
status = "onbekend"
|
||||
|
||||
if nominatie == "bewaren":
|
||||
status = "te bewaren (oneindig)"
|
||||
elif nominatie == "vernietigen" and jaren and issue_date_str:
|
||||
try:
|
||||
issue = date.fromisoformat(issue_date_str)
|
||||
destroy_date = issue.replace(year=issue.year + jaren)
|
||||
if date.today() >= destroy_date:
|
||||
status = "te vernietigen"
|
||||
else:
|
||||
status = f"te bewaren tot {destroy_date.isoformat()}"
|
||||
except (ValueError, OverflowError):
|
||||
status = f"te bewaren ({jaren} jaar na uitgifte)"
|
||||
elif nominatie == "vernietigen":
|
||||
status = f"te bewaren ({jaren} jaar na uitgifte)" if jaren else "onbekend"
|
||||
|
||||
return nominatie, jaren, status
|
||||
|
||||
|
||||
@api.route("/info")
|
||||
def info():
|
||||
from app.config import settings
|
||||
|
||||
return jsonify(app_name=settings.app_name)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AI endpoints — via Druppie SDK (calls module-llm and module-vision)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@api.route("/ai/chat", methods=["POST"])
|
||||
def ai_chat_endpoint():
|
||||
"""LLM chat completion. Body: {"prompt": "...", "system": "..."}"""
|
||||
data = request.get_json(silent=True)
|
||||
if not data or "prompt" not in data:
|
||||
return jsonify(error="Missing required field: prompt"), 400
|
||||
result = druppie.call("llm", "chat", {
|
||||
"prompt": data["prompt"],
|
||||
"system": data.get("system", "You are a helpful assistant."),
|
||||
})
|
||||
return jsonify(answer=result.get("answer", ""))
|
||||
|
||||
|
||||
@api.route("/ai/ocr", methods=["POST"])
|
||||
def ai_ocr_endpoint():
|
||||
"""OCR text extraction. Body: {"image_url": "https://..."}"""
|
||||
data = request.get_json(silent=True)
|
||||
if not data or "image_url" not in data:
|
||||
return jsonify(error="Missing required field: image_url"), 400
|
||||
result = druppie.call("vision", "ocr", {"image_source": data["image_url"]})
|
||||
return jsonify(text=result.get("text", ""))
|
||||
|
||||
|
||||
@api.route("/ai/search", methods=["POST"])
|
||||
def ai_search_endpoint():
|
||||
"""Web search. Body: {"query": "search terms"}"""
|
||||
data = request.get_json(silent=True)
|
||||
if not data or "query" not in data:
|
||||
return jsonify(error="Missing required field: query"), 400
|
||||
result = druppie.call("web", "search_web", {"query": data["query"]})
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RAG endpoints — worked example of the embed → store → search loop
|
||||
#
|
||||
# Vectors live in THIS app's own Postgres (pgvector); embeddings are
|
||||
# generated by the stateless module-llm `embed` tool via the SDK. There is
|
||||
# no shared vectorstore — each app owns its own vectors. The `RAG` helper
|
||||
# (app/rag.py) handles chunking, the embed call, storage, and search.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@api.route("/rag/index", methods=["POST"])
|
||||
def rag_index_endpoint():
|
||||
"""Embed and store documents in the app's own database.
|
||||
|
||||
Body: {"documents": [{"content": "...", "source_name": "...",
|
||||
"source_page": 1}, ...]}
|
||||
|
||||
For each document the RAG helper chunks the text, calls
|
||||
module-llm `embed` to turn each chunk into a vector, and stores the
|
||||
chunk + vector in this app's `vector_chunks` table (pgvector).
|
||||
"""
|
||||
data = request.get_json(silent=True)
|
||||
if not data or not data.get("documents"):
|
||||
return jsonify(error="Missing required field: documents"), 400
|
||||
@api.route("/permits/upload", methods=["POST"])
|
||||
def upload_permit():
|
||||
"""Upload permit document — OCR + metadata extraction + archive classification."""
|
||||
if "file" not in request.files:
|
||||
return jsonify(error="Geen bestand geüpload."), 400
|
||||
file = request.files["file"]
|
||||
if not file.filename or not _allowed_file(file.filename):
|
||||
return jsonify(error="Ongeldig bestandstype. Upload JPG, PNG of PDF."), 400
|
||||
file_bytes = file.read()
|
||||
if len(file_bytes) > MAX_FILE_SIZE:
|
||||
return jsonify(error="Bestand te groot (max 25 MB)."), 400
|
||||
if not file_bytes:
|
||||
return jsonify(error="Leeg bestand."), 400
|
||||
|
||||
db = next(get_db())
|
||||
rag = RAG(db, druppie)
|
||||
rag.create_index(RAG_INDEX)
|
||||
result = rag.index_documents(RAG_INDEX, data["documents"])
|
||||
return jsonify(result)
|
||||
permit = Permit(
|
||||
permit_number=request.form.get("permit_number", "").strip() or None,
|
||||
applicant_name=request.form.get("applicant_name", "").strip() or None,
|
||||
source_file=file.filename,
|
||||
source_system=request.form.get("source_system", "upload").strip(),
|
||||
status="processing",
|
||||
)
|
||||
db.add(permit)
|
||||
db.commit()
|
||||
db.refresh(permit)
|
||||
|
||||
ext = file.filename.rsplit(".", 1)[1].lower()
|
||||
mime_map = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
|
||||
"pdf": "application/pdf", "tiff": "image/tiff", "webp": "image/webp"}
|
||||
mime = mime_map.get(ext, "application/octet-stream")
|
||||
data_uri = f"data:{mime};base64,{base64.b64encode(file_bytes).decode()}"
|
||||
|
||||
try:
|
||||
# Step 1: OCR — extract text from document
|
||||
ocr_result = druppie.call("vision", "ocr", {
|
||||
"image_source": data_uri,
|
||||
"prompt": "Extraheer alle tekst uit dit vergunningdocument. Bewaar de structuur.",
|
||||
})
|
||||
extracted_text = ocr_result.get("text", "")
|
||||
permit.extracted_text = extracted_text
|
||||
|
||||
# Step 2: LLM — extract structured metadata from text
|
||||
meta_result = druppie.call("llm", "chat", {
|
||||
"prompt": METADATA_EXTRACTION_PROMPT + extracted_text[:4000],
|
||||
"system": "Je bent een metadata-extractie specialist voor Nederlandse watervergunningen. Antwoord ALLEEN met valid JSON.",
|
||||
})
|
||||
raw_answer = meta_result.get("answer", "{}")
|
||||
|
||||
# Parse JSON from LLM response (handle markdown code blocks)
|
||||
json_str = raw_answer
|
||||
if "```" in json_str:
|
||||
json_str = json_str.split("```")[1]
|
||||
if json_str.startswith("json"):
|
||||
json_str = json_str[4:]
|
||||
json_str = json_str.strip()
|
||||
|
||||
try:
|
||||
meta = json.loads(json_str)
|
||||
except json.JSONDecodeError:
|
||||
meta = {}
|
||||
logger.warning("Failed to parse metadata JSON: %s", raw_answer[:200])
|
||||
|
||||
# Apply extracted metadata (LLM fills gaps, user input takes priority)
|
||||
permit.permit_number = permit.permit_number or meta.get("permit_number") or None
|
||||
permit.applicant_name = permit.applicant_name or meta.get("applicant_name") or None
|
||||
permit.permit_holder_name = meta.get("permit_holder_name") or None
|
||||
permit.issuer_name = meta.get("issuer_name") or None
|
||||
permit.location = meta.get("location") or None
|
||||
permit.applicable_law = meta.get("applicable_law") or None
|
||||
permit.work_type = meta.get("work_type") or None
|
||||
permit.water_type = meta.get("water_type") or None
|
||||
permit.embankment_type = meta.get("embankment_type") or None
|
||||
permit.permit_type = meta.get("permit_type", "onbekend") or "onbekend"
|
||||
permit.source_system = permit.source_system or meta.get("source_system") or "upload"
|
||||
|
||||
# Parse dates
|
||||
for field, key in [("issue_date", "issue_date"), ("expiry_date", "expiry_date")]:
|
||||
val = meta.get(key)
|
||||
if val:
|
||||
try:
|
||||
setattr(permit, field, date.fromisoformat(val))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# Step 3: Compute archive status (BR-01 to BR-06)
|
||||
nominatie, jaren, arch_status = _compute_archive_status(
|
||||
permit.permit_type,
|
||||
permit.issue_date.isoformat() if permit.issue_date else None,
|
||||
)
|
||||
permit.archive_nomination = nominatie
|
||||
permit.retention_years = jaren
|
||||
permit.archive_status = arch_status
|
||||
permit.status = "processed"
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Processing failed for permit %s: %s", permit.id, e)
|
||||
permit.status = "error"
|
||||
permit.error_message = str(e)[:500]
|
||||
|
||||
db.commit()
|
||||
db.refresh(permit)
|
||||
return jsonify(_permit_to_dict(permit))
|
||||
|
||||
|
||||
@api.route("/rag/search", methods=["POST"])
|
||||
def rag_search_endpoint():
|
||||
"""Semantic similarity search over the stored documents.
|
||||
|
||||
Body: {"query": "natural-language question", "top_k": 5}
|
||||
|
||||
The query is embedded with the same module-llm `embed` tool, then
|
||||
matched against the stored chunks with pgvector's cosine distance
|
||||
(`embedding <=> :qvec`). Returns the top-k chunks with their source
|
||||
metadata so the caller can build a cited answer.
|
||||
"""
|
||||
data = request.get_json(silent=True)
|
||||
if not data or "query" not in data:
|
||||
return jsonify(error="Missing required field: query"), 400
|
||||
|
||||
@api.route("/permits")
|
||||
def list_permits():
|
||||
db = next(get_db())
|
||||
rag = RAG(db, druppie)
|
||||
results = rag.search(RAG_INDEX, data["query"], top_k=data.get("top_k", 5))
|
||||
return jsonify(results=results)
|
||||
permits = db.query(Permit).order_by(Permit.created_at.desc()).all()
|
||||
return jsonify([_permit_summary(p) for p in permits])
|
||||
|
||||
|
||||
@api.route("/permits/<int:permit_id>")
|
||||
def get_permit(permit_id):
|
||||
db = next(get_db())
|
||||
permit = db.query(Permit).filter(Permit.id == permit_id).first()
|
||||
if not permit:
|
||||
return jsonify(error="Niet gevonden"), 404
|
||||
return jsonify(_permit_to_dict(permit))
|
||||
|
||||
|
||||
@api.route("/permits/<int:permit_id>", methods=["DELETE"])
|
||||
def delete_permit(permit_id):
|
||||
db = next(get_db())
|
||||
permit = db.query(Permit).filter(Permit.id == permit_id).first()
|
||||
if not permit:
|
||||
return jsonify(error="Niet gevonden"), 404
|
||||
db.delete(permit)
|
||||
db.commit()
|
||||
return jsonify(ok=True)
|
||||
|
||||
|
||||
@api.route("/permits/search")
|
||||
def search_permits():
|
||||
"""Search across all metadata fields (FR-01)."""
|
||||
q = request.args.get("q", "").strip()
|
||||
db = next(get_db())
|
||||
if not q:
|
||||
return jsonify([_permit_summary(p) for p in db.query(Permit).order_by(Permit.created_at.desc()).all()])
|
||||
like = f"%{q}%"
|
||||
results = db.query(Permit).filter(
|
||||
Permit.permit_number.ilike(like)
|
||||
| Permit.applicant_name.ilike(like)
|
||||
| Permit.permit_holder_name.ilike(like)
|
||||
| Permit.location.ilike(like)
|
||||
| Permit.permit_type.ilike(like)
|
||||
| Permit.applicable_law.ilike(like)
|
||||
| Permit.work_type.ilike(like)
|
||||
| Permit.water_type.ilike(like)
|
||||
| Permit.extracted_text.ilike(like)
|
||||
).order_by(Permit.created_at.desc()).all()
|
||||
return jsonify([_permit_summary(p) for p in results])
|
||||
|
||||
|
||||
@api.route("/permits/stats")
|
||||
def permit_stats():
|
||||
"""Dashboard stats."""
|
||||
db = next(get_db())
|
||||
total = db.query(Permit).count()
|
||||
by_type = {}
|
||||
for p in db.query(Permit).all():
|
||||
t = (p.permit_type or "onbekend").lower()
|
||||
by_type[t] = by_type.get(t, 0) + 1
|
||||
by_archive = {}
|
||||
for p in db.query(Permit).all():
|
||||
s = p.archive_status or "onbekend"
|
||||
by_archive[s] = by_archive.get(s, 0) + 1
|
||||
return jsonify(total=total, by_type=by_type, by_archive=by_archive)
|
||||
|
||||
|
||||
def _permit_summary(p):
|
||||
return {
|
||||
"id": p.id, "permit_number": p.permit_number,
|
||||
"applicant_name": p.applicant_name, "permit_type": p.permit_type,
|
||||
"location": p.location, "source_file": p.source_file,
|
||||
"source_system": p.source_system, "status": p.status,
|
||||
"archive_status": p.archive_status,
|
||||
"issue_date": p.issue_date.isoformat() if p.issue_date else None,
|
||||
"upload_date": p.upload_date.isoformat() if p.upload_date else None,
|
||||
}
|
||||
|
||||
|
||||
def _permit_to_dict(p):
|
||||
return {
|
||||
**_permit_summary(p),
|
||||
"permit_holder_name": p.permit_holder_name,
|
||||
"issuer_name": p.issuer_name,
|
||||
"expiry_date": p.expiry_date.isoformat() if p.expiry_date else None,
|
||||
"applicable_law": p.applicable_law,
|
||||
"work_type": p.work_type, "water_type": p.water_type,
|
||||
"embankment_type": p.embankment_type,
|
||||
"archive_nomination": p.archive_nomination,
|
||||
"retention_years": p.retention_years,
|
||||
"extracted_text": p.extracted_text, "error_message": p.error_message,
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue