From aa318d0af277c13fbdb1ec4523e7094ab3a8ac25 Mon Sep 17 00:00:00 2001 From: nicholai Date: Sat, 13 Sep 2025 13:54:28 -0600 Subject: [PATCH] feat(search): add Why endpoint (scores+LLM with rate limiting), results list with highlights + Why buttons, FiltersBar for facets, and Cmd/Ctrl+K command palette; wire filters + search UI into page and use typed SearchResult/SearchHit --- src/app/api/search/why/route.ts | 151 ++++++++++++++++++++ src/app/page.tsx | 116 +++++++++------ src/components/search/command-palette.tsx | 62 ++++++++ src/components/search/filters-bar.tsx | 164 ++++++++++++++++++++++ src/components/search/results-list.tsx | 159 +++++++++++++++++++++ src/lib/elasticsearch.ts | 74 +++++++--- src/types/search.ts | 7 + 7 files changed, 667 insertions(+), 66 deletions(-) create mode 100644 src/app/api/search/why/route.ts create mode 100644 src/components/search/command-palette.tsx create mode 100644 src/components/search/filters-bar.tsx create mode 100644 src/components/search/results-list.tsx diff --git a/src/app/api/search/why/route.ts b/src/app/api/search/why/route.ts new file mode 100644 index 0000000..ef16695 --- /dev/null +++ b/src/app/api/search/why/route.ts @@ -0,0 +1,151 @@ +import * as Sentry from "@sentry/nextjs"; +import { NextResponse } from "next/server"; +import OpenAI from "openai"; +import { env, embeddingsEnabled } from "@/lib/env"; +import type { Highlights } from "@/types/search"; + +export const runtime = "nodejs"; +export const dynamic = "force-dynamic"; + +type WhyBody = { + q: string; + doc: { + id: string; + name: string; + path: string; + mimeType?: string; + sizeBytes?: number; + }; + highlights?: Highlights; + bm25Score?: number; + vectorScore?: number; + mode?: "llm" | "scores"; +}; + +function json(data: T, init?: { status?: number } & ResponseInit) { + return NextResponse.json(data, init); +} + +// Simple in-memory rate limiter (per minute) +// Note: this is best-effort and not distributed; sufficient for local/dev and small deployments. +const BUCKET: Map = new Map(); +const LIMIT = 5; +const WINDOW_MS = 60_000; + +function keyFromRequest(req: Request) { + const ip = + req.headers.get("x-forwarded-for") || + req.headers.get("x-real-ip") || + "local"; + return String(ip).split(",")[0].trim(); +} + +function checkRateLimit(req: Request) { + const key = keyFromRequest(req); + const now = Date.now(); + const entry = BUCKET.get(key); + if (!entry || now >= entry.resetAt) { + BUCKET.set(key, { count: 1, resetAt: now + WINDOW_MS }); + return { ok: true, remaining: LIMIT - 1, resetAt: now + WINDOW_MS }; + } + if (entry.count >= LIMIT) { + return { ok: false, remaining: 0, resetAt: entry.resetAt }; + } + entry.count += 1; + BUCKET.set(key, entry); + return { ok: true, remaining: LIMIT - entry.count, resetAt: entry.resetAt }; +} + +export async function POST(req: Request) { + try { + const rate = checkRateLimit(req); + if (!rate.ok) { + return json( + { error: "rate_limited", message: "Too many requests for Why. Try again soon." }, + { status: 429, headers: { "Retry-After": String(Math.ceil((rate.resetAt - Date.now()) / 1000)) } }, + ); + } + + const body = (await req.json().catch(() => ({}))) as Partial; + const q = (body.q || "").trim(); + const doc = body.doc; + const mode = body.mode || "llm"; + if (!q || !doc?.id || !doc?.name || !doc?.path) { + return json({ error: "bad_request", message: "q, doc.id, doc.name, doc.path are required" }, { status: 400 }); + } + + // Fast "scores/highlights" explanation without LLM + if (mode === "scores" || !embeddingsEnabled) { + const lines: string[] = []; + lines.push(`Query matched document "${doc.name}" at ${doc.path}.`); + if (typeof body.bm25Score === "number") lines.push(`- BM25 relevance score: ${body.bm25Score.toFixed(3)}`); + if (typeof body.vectorScore === "number") lines.push(`- Vector similarity: ${body.vectorScore.toFixed(3)}`); + const hl = body.highlights; + const pick = (arr?: string[]) => (arr && arr.length ? arr.slice(0, 2).join(" … ") : undefined); + const nameHl = pick(hl?.name); + const titleHl = pick(hl?.title); + const contentHl = pick(hl?.content); + if (nameHl) lines.push(`- Matched name: ${nameHl}`); + if (titleHl) lines.push(`- Matched title: ${titleHl}`); + if (contentHl) lines.push(`- Matched content: ${contentHl}`); + return json({ explanation: lines.join("\n") }); + } + + // LLM explanation path + if (!env.OPENAI_API_KEY || !env.OPENAI_API_BASE || !env.OPENAI_EMBEDDING_MODEL) { + // embeddingsEnabled is true only if all three above are set, but double-check + return json({ error: "not_configured", message: "LLM is not configured on the server." }, { status: 400 }); + } + + const client = new OpenAI({ apiKey: env.OPENAI_API_KEY, baseURL: env.OPENAI_API_BASE }); + + const sys = + "You are a concise assistant that explains why a search result matched a user query. " + + "Use the provided scores and highlights. Do not invent information. Keep it <= 6 short bullet points."; + const userPayload = { + query: q, + document: { + id: doc.id, + name: doc.name, + path: doc.path, + mimeType: doc.mimeType, + sizeBytes: doc.sizeBytes, + }, + scores: { + bm25: body.bm25Score, + vector: body.vectorScore, + }, + highlights: body.highlights, + }; + + const result = await Sentry.startSpan( + { op: "ai.chat", name: "search.why.llm" }, + async () => { + const resp = await client.chat.completions.create({ + model: "gpt-4o-mini", + temperature: 0.2, + max_tokens: 250, + messages: [ + { role: "system", content: sys }, + { + role: "user", + content: + "Explain succinctly why this result matched the query. Use the highlights if present. " + + "Return bullet points:\n\n" + JSON.stringify(userPayload, null, 2), + }, + ], + }); + const text = resp.choices?.[0]?.message?.content?.toString().trim() || ""; + return text; + }, + ); + + return json({ explanation: result }); + } catch (error) { + Sentry.captureException(error); + return json( + { error: "why_failed", message: error instanceof Error ? error.message : String(error) }, + { status: 500 }, + ); + } +} diff --git a/src/app/page.tsx b/src/app/page.tsx index e183eb9..28983d7 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -14,6 +14,10 @@ import { MarkdownEditor } from "@/components/editor/markdown-editor"; import { toast } from "sonner"; import { TagsDialog } from "@/components/files/tags-dialog"; import { ModeToggle } from "@/components/theme/mode-toggle"; +import { SearchResultsList } from "@/components/search/results-list"; +import { FiltersBar } from "@/components/search/filters-bar"; +import { CommandPalette } from "@/components/search/command-palette"; +import type { SearchResult, SearchHit, FacetFilters } from "@/types/search"; type FilesListResponse = { total: number; @@ -31,24 +35,6 @@ type FilesListResponse = { }>; }; -type SearchResult = { - total: number; - tookMs: number; - hits: Array<{ - score: number; - bm25Score?: number; - vectorScore?: number; - doc: { - id: string; - name: string; - path: string; - parentPath?: string; - sizeBytes: number; - mimeType: string; - etag?: string; - }; - }>; -}; async function fetchFiles(path?: string, page = 1, perPage = 50) { const url = new URL("/api/files/list", window.location.origin); @@ -61,11 +47,17 @@ async function fetchFiles(path?: string, page = 1, perPage = 50) { return data; } -async function executeSearch(q: string, semantic: boolean, page: number, perPage: number) { +async function executeSearch( + q: string, + semantic: boolean, + page: number, + perPage: number, + filters: FacetFilters, +): Promise { const res = await fetch("/api/search/query", { method: "POST", headers: { "content-type": "application/json" }, - body: JSON.stringify({ q, semantic, page, perPage }), + body: JSON.stringify({ q, filters, semantic, page, perPage }), }); if (!res.ok) throw new Error(`Search failed: ${res.status}`); const data = (await res.json()) as SearchResult; @@ -81,6 +73,8 @@ export default function Home() { const [q, setQ] = React.useState(""); const [semantic, setSemantic] = React.useState(false); + const [filters, setFilters] = React.useState({}); + const [paletteOpen, setPaletteOpen] = React.useState(false); const searching = q.trim().length > 0; // Editor state @@ -95,25 +89,17 @@ export default function Home() { enabled: !searching, }); - const searchQuery = useQuery({ - queryKey: ["search", q, semantic, page, perPage], - queryFn: () => executeSearch(q.trim(), semantic, page, perPage), + const searchQuery = useQuery({ + queryKey: ["search", q, semantic, page, perPage, filters], + queryFn: () => executeSearch(q.trim(), semantic, page, perPage, filters), enabled: searching, }); + const searchHits: SearchHit[] = React.useMemo(() => { + return (searchQuery.data?.hits ?? []) as SearchHit[]; + }, [searchQuery.data]); + const files: FileRow[] = React.useMemo(() => { - if (searching) { - const hits = searchQuery.data?.hits ?? []; - return hits.map((h) => ({ - id: h.doc.id, - name: h.doc.name, - path: h.doc.path, - parentPath: h.doc.parentPath, - sizeBytes: h.doc.sizeBytes, - mimeType: h.doc.mimeType, - etag: h.doc.etag, - })); - } const items = filesQuery.data?.items ?? []; return items.map((it) => ({ id: it.id, @@ -124,7 +110,7 @@ export default function Home() { mimeType: it.mimeType, etag: it.etag, })); - }, [filesQuery.data, searchQuery.data, searching]); + }, [filesQuery.data]); function handleDownload(item: FileRow) { const url = new URL("/api/files/download", window.location.origin); @@ -305,6 +291,19 @@ export default function Home() { setTagsPath(item.path); } + // Helpers for search results list (path-only operations) + function downloadByPath(p: string) { + const url = new URL("/api/files/download", window.location.origin); + url.searchParams.set("path", p); + window.open(url.toString(), "_blank", "noopener,noreferrer"); + } + function openByPath(p: string) { + downloadByPath(p); + } + function tagsByPath(p: string) { + setTagsPath(p); + } + function handleUploaded() { if (!searching) { filesQuery.refetch(); @@ -361,6 +360,12 @@ export default function Home() { + {searching ? ( +
+ +
+ ) : null} +
{searching ? (
@@ -369,16 +374,26 @@ export default function Home() { : `Found ${searchQuery.data?.total ?? 0} in ${searchQuery.data?.tookMs ?? 0}ms`}
) : null} - + {searching ? ( + + ) : ( + + )}
@@ -405,6 +420,15 @@ export default function Home() { if (!searching) filesQuery.refetch(); }} /> + {/* Command palette (Cmd/Ctrl+K) */} + { + setPage(1); + setQ(text); + }} + /> ); } diff --git a/src/components/search/command-palette.tsx b/src/components/search/command-palette.tsx new file mode 100644 index 0000000..ae54ab6 --- /dev/null +++ b/src/components/search/command-palette.tsx @@ -0,0 +1,62 @@ +"use client"; + +import * as React from "react"; +import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { Button } from "@/components/ui/button"; + +export function CommandPalette({ + open, + onOpenChange, + onSubmit, +}: { + open: boolean; + onOpenChange: (open: boolean) => void; + onSubmit: (q: string) => void; +}) { + const [value, setValue] = React.useState(""); + + // Global shortcut Cmd/Ctrl+K to open + React.useEffect(() => { + const onKey = (e: KeyboardEvent) => { + const isMeta = e.metaKey || e.ctrlKey; + if (isMeta && (e.key === "k" || e.key === "K")) { + e.preventDefault(); + onOpenChange(true); + } + }; + window.addEventListener("keydown", onKey); + return () => window.removeEventListener("keydown", onKey); + }, [onOpenChange]); + + function handleSubmit(e: React.FormEvent) { + e.preventDefault(); + const q = value.trim(); + if (!q) return; + onSubmit(q); + onOpenChange(false); + setValue(""); + } + + return ( + onOpenChange(v)}> + + + Search (Cmd/Ctrl+K) + +
+ setValue(e.target.value)} + /> + +
+
+ Tip: Toggle semantic search using the checkbox on the main toolbar. +
+
+
+ ); +} diff --git a/src/components/search/filters-bar.tsx b/src/components/search/filters-bar.tsx new file mode 100644 index 0000000..e4121ca --- /dev/null +++ b/src/components/search/filters-bar.tsx @@ -0,0 +1,164 @@ +"use client"; + +import * as React from "react"; +import { Input } from "@/components/ui/input"; +import { Button } from "@/components/ui/button"; +import type { FacetFilters } from "@/types/search"; +import type { PathId } from "@/lib/paths"; + +export function FiltersBar({ + value, + onChange, +}: { + value: FacetFilters; + onChange: (v: FacetFilters) => void; +}) { + const [local, setLocal] = React.useState(value); + + React.useEffect(() => { + setLocal(value); + }, [value]); + + function apply() { + const pathPrefix: PathId | undefined = + typeof local.pathPrefix === "string" + ? ((local.pathPrefix.trim() || undefined) as PathId | undefined) + : (local.pathPrefix as PathId | undefined); + + onChange({ + ...local, + // normalize empty strings → undefined | [] + types: parseCSV(local.types as unknown as string), + owner: parseCSV(local.owner as unknown as string), + tags: parseCSV(local.tags as unknown as string), + dateFrom: normalizeEmpty(local.dateFrom), + dateTo: normalizeEmpty(local.dateTo), + pathPrefix, + sizeMinBytes: normalizeNumber(local.sizeMinBytes), + sizeMaxBytes: normalizeNumber(local.sizeMaxBytes), + }); + } + + function clearAll() { + onChange({}); + } + + return ( +
+
+ + setLocal((p) => ({ ...p, types: parseCSV(e.target.value) }))} + className="w-56" + /> +
+
+ + setLocal((p) => ({ ...p, owner: parseCSV(e.target.value) }))} + className="w-40" + /> +
+
+ + setLocal((p) => ({ ...p, tags: parseCSV(e.target.value) }))} + className="w-44" + /> +
+
+ + + setLocal((p) => ({ + ...p, + pathPrefix: (e.target.value.trim() || undefined) as PathId | undefined, + })) + } + className="w-[28rem]" + /> +
+
+ + setLocal((p) => ({ ...p, dateFrom: e.target.value }))} + className="w-40" + /> +
+
+ + setLocal((p) => ({ ...p, dateTo: e.target.value }))} + className="w-40" + /> +
+
+ + setLocal((p) => ({ ...p, sizeMinBytes: parseNumber(e.target.value) }))} + className="w-40" + /> +
+
+ + setLocal((p) => ({ ...p, sizeMaxBytes: parseNumber(e.target.value) }))} + className="w-40" + /> +
+ +
+ + +
+
+ ); +} + +function parseCSV(v?: string | string[]): string[] | undefined { + if (Array.isArray(v)) return v; + const s = (v ?? "").trim(); + if (!s) return undefined; + return s.split(",").map((x) => x.trim()).filter(Boolean); +} + +function toCSV(v?: string[]) { + return (v ?? []).join(", "); +} + +function normalizeEmpty(v?: T) { + return v && v.trim().length ? v : undefined; +} + +function parseNumber(v?: string) { + if (v == null || v === "") return undefined; + const n = Number(v); + return Number.isFinite(n) ? n : undefined; +} + +function normalizeNumber(n?: number | null) { + if (n == null) return undefined; + return Number.isFinite(n) ? n : undefined; +} diff --git a/src/components/search/results-list.tsx b/src/components/search/results-list.tsx new file mode 100644 index 0000000..961f035 --- /dev/null +++ b/src/components/search/results-list.tsx @@ -0,0 +1,159 @@ +"use client"; + +import * as React from "react"; +import * as Sentry from "@sentry/nextjs"; +import { Button } from "@/components/ui/button"; +import type { SearchHit } from "@/types/search"; + +type WhyState = { + loading: boolean; + mode: "scores" | "llm"; + text?: string; + error?: string; +}; + +export function SearchResultsList({ + q, + hits, + onOpenPath, + onDownloadPath, + onTagsPath, +}: { + q: string; + hits: SearchHit[]; + onOpenPath?: (path: string) => void; + onDownloadPath?: (path: string) => void; + onTagsPath?: (path: string) => void; +}) { + const [why, setWhy] = React.useState>({}); + + async function fetchWhy(id: string, payload: Record) { + return Sentry.startSpan( + { op: "ui.click", name: "search.why" }, + async () => { + const res = await fetch("/api/search/why", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(payload), + }); + if (!res.ok) { + const data = await res.json().catch(() => ({})); + throw new Error(data?.message || `Why failed (${res.status})`); + } + const data = (await res.json()) as { explanation: string }; + return data.explanation; + }, + ); + } + + async function handleWhy(hit: SearchHit, mode: "scores" | "llm") { + const id = hit.doc.id; + setWhy((prev) => ({ ...prev, [id]: { loading: true, mode } })); + try { + const explanation = await fetchWhy(id, { + q, + mode, + doc: { + id: hit.doc.id, + name: hit.doc.name, + path: hit.doc.path, + mimeType: hit.doc.mimeType, + sizeBytes: hit.doc.sizeBytes, + }, + highlights: hit.highlights, + bm25Score: hit.bm25Score, + vectorScore: hit.vectorScore, + }); + setWhy((prev) => ({ ...prev, [id]: { loading: false, mode, text: explanation } })); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + setWhy((prev) => ({ ...prev, [id]: { loading: false, mode, error: message } })); + Sentry.captureException(err); + } + } + + const highlight = (parts?: string[]) => { + if (!parts || parts.length === 0) return null; + const html = parts.slice(0, 2).join(" … "); + return ( +
+ ); + }; + + return ( +
+ {hits.map((h) => { + const state = why[h.doc.id]; + return ( +
+
+
+ +
{h.doc.path}
+
+
+
+ {typeof h.bm25Score === "number" ? `BM25 ${h.bm25Score.toFixed(2)}` : null} + {typeof h.vectorScore === "number" ? ` · Vec ${h.vectorScore.toFixed(2)}` : null} +
+ + + + +
+
+ + {/* Highlights */} +
+ {highlight(h.highlights?.name)} + {highlight(h.highlights?.title)} + {highlight(h.highlights?.content)} +
+ + {/* Why output */} + {state?.text ? ( +
+
{state.text}
+
+ ) : null} + {state?.error ? ( +
{state.error}
+ ) : null} +
+ ); + })} + {hits.length === 0 ? ( +
No results
+ ) : null} +
+ ); +} diff --git a/src/lib/elasticsearch.ts b/src/lib/elasticsearch.ts index 3767c11..a50e960 100644 --- a/src/lib/elasticsearch.ts +++ b/src/lib/elasticsearch.ts @@ -259,17 +259,23 @@ export async function bulkIndex(docs: IndexDocument[]): Promise { } function normalizeHits( - hits: Array<{ _id: string; _source: unknown; _score?: number }>, -): { id: string; _source: unknown; _score: number }[] { + hits: Array<{ + _id: string; + _source: unknown; + _score?: number; + highlight?: Record; + }>, +): { id: string; _source: unknown; _score: number; _highlight?: Record }[] { return hits.map((h) => ({ id: h._id, _source: h._source, _score: h._score ?? 0, + _highlight: h.highlight, })); } function toSearchResult( - hits: { id: string; _source: unknown; _score: number }[], + hits: { id: string; _source: unknown; _score: number; _highlight?: Record }[], total: number, took: number, ): SearchResult { @@ -281,12 +287,20 @@ function toSearchResult( bm25Score?: number; vectorScore?: number; }; + const hl = h._highlight; return { // We store docs in the same shape as IndexDocument; API layer adapts to UI type. doc: src as unknown as DocType, score: h._score, bm25Score: src.bm25Score, vectorScore: src.vectorScore, + highlights: hl + ? { + name: hl.name, + title: hl.title, + content: hl.content, + } + : undefined, }; }), }; @@ -339,6 +353,8 @@ export async function bm25Search( }, highlight: q ? { + pre_tags: [""], + post_tags: [""], fields: { name: {}, title: {}, @@ -359,7 +375,12 @@ export async function bm25Search( ? res.hits.total : res.hits.total?.value ?? 0; const hits = normalizeHits( - res.hits.hits as Array<{ _id: string; _source: unknown; _score?: number }>, + res.hits.hits as Array<{ + _id: string; + _source: unknown; + _score?: number; + highlight?: Record; + }>, ); return toSearchResult(hits, total, res.took ?? 0); }, @@ -441,10 +462,15 @@ export async function hybridSearch( vector ? knnSearchEmbedding(vector, { filters, k: perPage * 4 }) : Promise.resolve(null), ]); - // Normalize and blend - const scores = new Map< + // Build maps and normalize + const byId = new Map< string, - { src: DocType; bm25?: number; vec?: number } + { + doc: DocType; + bm25?: number; + vec?: number; + highlights?: { name?: string[]; title?: string[]; content?: string[] }; + } >(); let maxBm25 = 0; @@ -459,37 +485,45 @@ export async function hybridSearch( } for (const h of bm25.hits) { - scores.set(h.doc.id, { src: h.doc, bm25: maxBm25 ? h.score / maxBm25 : 0 }); + byId.set(h.doc.id, { + doc: h.doc, + bm25: maxBm25 ? h.score / maxBm25 : 0, + highlights: h.highlights, + }); } if (knn) { for (const h of knn.hits) { - const prev = scores.get(h.doc.id); + const prev = byId.get(h.doc.id); const norm = maxVec ? h.score / maxVec : 0; - if (prev) { - prev.vec = norm; - } else { - scores.set(h.doc.id, { src: h.doc, vec: norm }); - } + if (prev) prev.vec = norm; + else byId.set(h.doc.id, { doc: h.doc, vec: norm }); } } - const blended = Array.from(scores.entries()).map(([id, v]) => { + // Blend scores + const blended = Array.from(byId.entries()).map(([id, v]) => { const b = v.bm25 ?? 0; const w = v.vec ?? 0; const score = alpha * b + (1 - alpha) * w; - const srcObj = v.src as unknown as Record; return { id, - _source: { ...srcObj, bm25Score: b, vectorScore: w }, - _score: score, + doc: v.doc, + score, + bm25Score: b, + vectorScore: w, + highlights: v.highlights, }; }); - blended.sort((a, b) => b._score - a._score); + blended.sort((a, b) => b.score - a.score); const pageStart = (page - 1) * perPage; const pageItems = blended.slice(pageStart, pageStart + perPage); - return toSearchResult(pageItems, blended.length, 0); + return { + total: blended.length, + tookMs: 0, + hits: pageItems, + }; }, ); } diff --git a/src/types/search.ts b/src/types/search.ts index 013d97e..1bb67f7 100644 --- a/src/types/search.ts +++ b/src/types/search.ts @@ -14,6 +14,12 @@ export interface FacetFilters { export type SortMode = "relevance" | "modified" | "size"; +export type Highlights = { + name?: string[]; + title?: string[]; + content?: string[]; +}; + export interface SearchQuery { q: string; filters?: FacetFilters; @@ -28,6 +34,7 @@ export interface SearchHit { score: number; bm25Score?: number; vectorScore?: number; + highlights?: Highlights; } export interface SearchResult {