# rag_api.py from fastapi import FastAPI, Body from pydantic import BaseModel import os, chromadb from openai import OpenAI from dotenv import load_dotenv load_dotenv() # ---- setup ---- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) chroma = chromadb.PersistentClient(path="./chromadb") collection = chroma.get_or_create_collection("lyra_chats") app = FastAPI(title="Lyra RAG API") class Query(BaseModel): query: str n_results: int = 5 @app.post("/rag/search") def rag_search(q: Query = Body(...)): # embed query q_emb = client.embeddings.create( model="text-embedding-3-small", input=q.query ).data[0].embedding # retrieve matches results = collection.query(query_embeddings=[q_emb], n_results=q.n_results) docs = results["documents"][0] metas = results["metadatas"][0] context = "\n\n".join(docs) # synthesize short answer answer = client.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": "Answer based only on the context below. Be concise and practical."}, {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {q.query}"} ] ).choices[0].message.content return { "query": q.query, "answer": answer, "results": [ {"source": m.get("source"), "title": m.get("title"), "role": m.get("role"), "excerpt": d[:300]} for d, m in zip(docs, metas) ] } @app.get("/health") def health(): return {"status": "ok", "collection_count": collection.count()}