feat(search): add Why endpoint (scores+LLM with rate limiting), results list with highlights + Why buttons, FiltersBar for facets, and Cmd/Ctrl+K command palette; wire filters + search UI into page and use typed SearchResult/SearchHit

This commit is contained in:
nicholai 2025-09-13 13:54:28 -06:00
parent 3264ac9596
commit aa318d0af2
7 changed files with 667 additions and 66 deletions

View File

@ -0,0 +1,151 @@
import * as Sentry from "@sentry/nextjs";
import { NextResponse } from "next/server";
import OpenAI from "openai";
import { env, embeddingsEnabled } from "@/lib/env";
import type { Highlights } from "@/types/search";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
type WhyBody = {
q: string;
doc: {
id: string;
name: string;
path: string;
mimeType?: string;
sizeBytes?: number;
};
highlights?: Highlights;
bm25Score?: number;
vectorScore?: number;
mode?: "llm" | "scores";
};
function json<T>(data: T, init?: { status?: number } & ResponseInit) {
return NextResponse.json(data, init);
}
// Simple in-memory rate limiter (per minute)
// Note: this is best-effort and not distributed; sufficient for local/dev and small deployments.
const BUCKET: Map<string, { count: number; resetAt: number }> = new Map();
const LIMIT = 5;
const WINDOW_MS = 60_000;
function keyFromRequest(req: Request) {
const ip =
req.headers.get("x-forwarded-for") ||
req.headers.get("x-real-ip") ||
"local";
return String(ip).split(",")[0].trim();
}
function checkRateLimit(req: Request) {
const key = keyFromRequest(req);
const now = Date.now();
const entry = BUCKET.get(key);
if (!entry || now >= entry.resetAt) {
BUCKET.set(key, { count: 1, resetAt: now + WINDOW_MS });
return { ok: true, remaining: LIMIT - 1, resetAt: now + WINDOW_MS };
}
if (entry.count >= LIMIT) {
return { ok: false, remaining: 0, resetAt: entry.resetAt };
}
entry.count += 1;
BUCKET.set(key, entry);
return { ok: true, remaining: LIMIT - entry.count, resetAt: entry.resetAt };
}
export async function POST(req: Request) {
try {
const rate = checkRateLimit(req);
if (!rate.ok) {
return json(
{ error: "rate_limited", message: "Too many requests for Why. Try again soon." },
{ status: 429, headers: { "Retry-After": String(Math.ceil((rate.resetAt - Date.now()) / 1000)) } },
);
}
const body = (await req.json().catch(() => ({}))) as Partial<WhyBody>;
const q = (body.q || "").trim();
const doc = body.doc;
const mode = body.mode || "llm";
if (!q || !doc?.id || !doc?.name || !doc?.path) {
return json({ error: "bad_request", message: "q, doc.id, doc.name, doc.path are required" }, { status: 400 });
}
// Fast "scores/highlights" explanation without LLM
if (mode === "scores" || !embeddingsEnabled) {
const lines: string[] = [];
lines.push(`Query matched document "${doc.name}" at ${doc.path}.`);
if (typeof body.bm25Score === "number") lines.push(`- BM25 relevance score: ${body.bm25Score.toFixed(3)}`);
if (typeof body.vectorScore === "number") lines.push(`- Vector similarity: ${body.vectorScore.toFixed(3)}`);
const hl = body.highlights;
const pick = (arr?: string[]) => (arr && arr.length ? arr.slice(0, 2).join(" … ") : undefined);
const nameHl = pick(hl?.name);
const titleHl = pick(hl?.title);
const contentHl = pick(hl?.content);
if (nameHl) lines.push(`- Matched name: ${nameHl}`);
if (titleHl) lines.push(`- Matched title: ${titleHl}`);
if (contentHl) lines.push(`- Matched content: ${contentHl}`);
return json({ explanation: lines.join("\n") });
}
// LLM explanation path
if (!env.OPENAI_API_KEY || !env.OPENAI_API_BASE || !env.OPENAI_EMBEDDING_MODEL) {
// embeddingsEnabled is true only if all three above are set, but double-check
return json({ error: "not_configured", message: "LLM is not configured on the server." }, { status: 400 });
}
const client = new OpenAI({ apiKey: env.OPENAI_API_KEY, baseURL: env.OPENAI_API_BASE });
const sys =
"You are a concise assistant that explains why a search result matched a user query. " +
"Use the provided scores and highlights. Do not invent information. Keep it <= 6 short bullet points.";
const userPayload = {
query: q,
document: {
id: doc.id,
name: doc.name,
path: doc.path,
mimeType: doc.mimeType,
sizeBytes: doc.sizeBytes,
},
scores: {
bm25: body.bm25Score,
vector: body.vectorScore,
},
highlights: body.highlights,
};
const result = await Sentry.startSpan(
{ op: "ai.chat", name: "search.why.llm" },
async () => {
const resp = await client.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.2,
max_tokens: 250,
messages: [
{ role: "system", content: sys },
{
role: "user",
content:
"Explain succinctly why this result matched the query. Use the highlights if present. " +
"Return bullet points:\n\n" + JSON.stringify(userPayload, null, 2),
},
],
});
const text = resp.choices?.[0]?.message?.content?.toString().trim() || "";
return text;
},
);
return json({ explanation: result });
} catch (error) {
Sentry.captureException(error);
return json(
{ error: "why_failed", message: error instanceof Error ? error.message : String(error) },
{ status: 500 },
);
}
}

View File

@ -14,6 +14,10 @@ import { MarkdownEditor } from "@/components/editor/markdown-editor";
import { toast } from "sonner"; import { toast } from "sonner";
import { TagsDialog } from "@/components/files/tags-dialog"; import { TagsDialog } from "@/components/files/tags-dialog";
import { ModeToggle } from "@/components/theme/mode-toggle"; import { ModeToggle } from "@/components/theme/mode-toggle";
import { SearchResultsList } from "@/components/search/results-list";
import { FiltersBar } from "@/components/search/filters-bar";
import { CommandPalette } from "@/components/search/command-palette";
import type { SearchResult, SearchHit, FacetFilters } from "@/types/search";
type FilesListResponse = { type FilesListResponse = {
total: number; total: number;
@ -31,24 +35,6 @@ type FilesListResponse = {
}>; }>;
}; };
type SearchResult = {
total: number;
tookMs: number;
hits: Array<{
score: number;
bm25Score?: number;
vectorScore?: number;
doc: {
id: string;
name: string;
path: string;
parentPath?: string;
sizeBytes: number;
mimeType: string;
etag?: string;
};
}>;
};
async function fetchFiles(path?: string, page = 1, perPage = 50) { async function fetchFiles(path?: string, page = 1, perPage = 50) {
const url = new URL("/api/files/list", window.location.origin); const url = new URL("/api/files/list", window.location.origin);
@ -61,11 +47,17 @@ async function fetchFiles(path?: string, page = 1, perPage = 50) {
return data; return data;
} }
async function executeSearch(q: string, semantic: boolean, page: number, perPage: number) { async function executeSearch(
q: string,
semantic: boolean,
page: number,
perPage: number,
filters: FacetFilters,
): Promise<SearchResult> {
const res = await fetch("/api/search/query", { const res = await fetch("/api/search/query", {
method: "POST", method: "POST",
headers: { "content-type": "application/json" }, headers: { "content-type": "application/json" },
body: JSON.stringify({ q, semantic, page, perPage }), body: JSON.stringify({ q, filters, semantic, page, perPage }),
}); });
if (!res.ok) throw new Error(`Search failed: ${res.status}`); if (!res.ok) throw new Error(`Search failed: ${res.status}`);
const data = (await res.json()) as SearchResult; const data = (await res.json()) as SearchResult;
@ -81,6 +73,8 @@ export default function Home() {
const [q, setQ] = React.useState(""); const [q, setQ] = React.useState("");
const [semantic, setSemantic] = React.useState(false); const [semantic, setSemantic] = React.useState(false);
const [filters, setFilters] = React.useState<FacetFilters>({});
const [paletteOpen, setPaletteOpen] = React.useState(false);
const searching = q.trim().length > 0; const searching = q.trim().length > 0;
// Editor state // Editor state
@ -95,25 +89,17 @@ export default function Home() {
enabled: !searching, enabled: !searching,
}); });
const searchQuery = useQuery({ const searchQuery = useQuery<SearchResult>({
queryKey: ["search", q, semantic, page, perPage], queryKey: ["search", q, semantic, page, perPage, filters],
queryFn: () => executeSearch(q.trim(), semantic, page, perPage), queryFn: () => executeSearch(q.trim(), semantic, page, perPage, filters),
enabled: searching, enabled: searching,
}); });
const searchHits: SearchHit[] = React.useMemo(() => {
return (searchQuery.data?.hits ?? []) as SearchHit[];
}, [searchQuery.data]);
const files: FileRow[] = React.useMemo(() => { const files: FileRow[] = React.useMemo(() => {
if (searching) {
const hits = searchQuery.data?.hits ?? [];
return hits.map((h) => ({
id: h.doc.id,
name: h.doc.name,
path: h.doc.path,
parentPath: h.doc.parentPath,
sizeBytes: h.doc.sizeBytes,
mimeType: h.doc.mimeType,
etag: h.doc.etag,
}));
}
const items = filesQuery.data?.items ?? []; const items = filesQuery.data?.items ?? [];
return items.map((it) => ({ return items.map((it) => ({
id: it.id, id: it.id,
@ -124,7 +110,7 @@ export default function Home() {
mimeType: it.mimeType, mimeType: it.mimeType,
etag: it.etag, etag: it.etag,
})); }));
}, [filesQuery.data, searchQuery.data, searching]); }, [filesQuery.data]);
function handleDownload(item: FileRow) { function handleDownload(item: FileRow) {
const url = new URL("/api/files/download", window.location.origin); const url = new URL("/api/files/download", window.location.origin);
@ -305,6 +291,19 @@ export default function Home() {
setTagsPath(item.path); setTagsPath(item.path);
} }
// Helpers for search results list (path-only operations)
function downloadByPath(p: string) {
const url = new URL("/api/files/download", window.location.origin);
url.searchParams.set("path", p);
window.open(url.toString(), "_blank", "noopener,noreferrer");
}
function openByPath(p: string) {
downloadByPath(p);
}
function tagsByPath(p: string) {
setTagsPath(p);
}
function handleUploaded() { function handleUploaded() {
if (!searching) { if (!searching) {
filesQuery.refetch(); filesQuery.refetch();
@ -361,6 +360,12 @@ export default function Home() {
<UploadDialog currentPath={path} onUploaded={handleUploaded} /> <UploadDialog currentPath={path} onUploaded={handleUploaded} />
</section> </section>
{searching ? (
<section className="p-3 border-b">
<FiltersBar value={filters} onChange={setFilters} />
</section>
) : null}
<section className="p-3 flex-1 overflow-auto"> <section className="p-3 flex-1 overflow-auto">
{searching ? ( {searching ? (
<div className="text-xs text-muted-foreground mb-2"> <div className="text-xs text-muted-foreground mb-2">
@ -369,6 +374,15 @@ export default function Home() {
: `Found ${searchQuery.data?.total ?? 0} in ${searchQuery.data?.tookMs ?? 0}ms`} : `Found ${searchQuery.data?.total ?? 0} in ${searchQuery.data?.tookMs ?? 0}ms`}
</div> </div>
) : null} ) : null}
{searching ? (
<SearchResultsList
q={q.trim()}
hits={searchHits}
onOpenPath={openByPath}
onDownloadPath={downloadByPath}
onTagsPath={tagsByPath}
/>
) : (
<FileTable <FileTable
items={files} items={files}
onOpen={handleOpen} onOpen={handleOpen}
@ -379,6 +393,7 @@ export default function Home() {
onDelete={handleDelete} onDelete={handleDelete}
onTags={handleTags} onTags={handleTags}
/> />
)}
</section> </section>
</main> </main>
@ -405,6 +420,15 @@ export default function Home() {
if (!searching) filesQuery.refetch(); if (!searching) filesQuery.refetch();
}} }}
/> />
{/* Command palette (Cmd/Ctrl+K) */}
<CommandPalette
open={paletteOpen}
onOpenChange={setPaletteOpen}
onSubmit={(text) => {
setPage(1);
setQ(text);
}}
/>
</div> </div>
); );
} }

View File

@ -0,0 +1,62 @@
"use client";
import * as React from "react";
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Button } from "@/components/ui/button";
export function CommandPalette({
open,
onOpenChange,
onSubmit,
}: {
open: boolean;
onOpenChange: (open: boolean) => void;
onSubmit: (q: string) => void;
}) {
const [value, setValue] = React.useState("");
// Global shortcut Cmd/Ctrl+K to open
React.useEffect(() => {
const onKey = (e: KeyboardEvent) => {
const isMeta = e.metaKey || e.ctrlKey;
if (isMeta && (e.key === "k" || e.key === "K")) {
e.preventDefault();
onOpenChange(true);
}
};
window.addEventListener("keydown", onKey);
return () => window.removeEventListener("keydown", onKey);
}, [onOpenChange]);
function handleSubmit(e: React.FormEvent) {
e.preventDefault();
const q = value.trim();
if (!q) return;
onSubmit(q);
onOpenChange(false);
setValue("");
}
return (
<Dialog open={open} onOpenChange={(v) => onOpenChange(v)}>
<DialogContent className="sm:max-w-xl">
<DialogHeader>
<DialogTitle>Search (Cmd/Ctrl+K)</DialogTitle>
</DialogHeader>
<form onSubmit={handleSubmit} className="flex items-center gap-2">
<Input
autoFocus
placeholder="Type your query…"
value={value}
onChange={(e) => setValue(e.target.value)}
/>
<Button type="submit">Search</Button>
</form>
<div className="text-xs text-muted-foreground mt-2">
Tip: Toggle semantic search using the checkbox on the main toolbar.
</div>
</DialogContent>
</Dialog>
);
}

View File

@ -0,0 +1,164 @@
"use client";
import * as React from "react";
import { Input } from "@/components/ui/input";
import { Button } from "@/components/ui/button";
import type { FacetFilters } from "@/types/search";
import type { PathId } from "@/lib/paths";
export function FiltersBar({
value,
onChange,
}: {
value: FacetFilters;
onChange: (v: FacetFilters) => void;
}) {
const [local, setLocal] = React.useState<FacetFilters>(value);
React.useEffect(() => {
setLocal(value);
}, [value]);
function apply() {
const pathPrefix: PathId | undefined =
typeof local.pathPrefix === "string"
? ((local.pathPrefix.trim() || undefined) as PathId | undefined)
: (local.pathPrefix as PathId | undefined);
onChange({
...local,
// normalize empty strings → undefined | []
types: parseCSV(local.types as unknown as string),
owner: parseCSV(local.owner as unknown as string),
tags: parseCSV(local.tags as unknown as string),
dateFrom: normalizeEmpty(local.dateFrom),
dateTo: normalizeEmpty(local.dateTo),
pathPrefix,
sizeMinBytes: normalizeNumber(local.sizeMinBytes),
sizeMaxBytes: normalizeNumber(local.sizeMaxBytes),
});
}
function clearAll() {
onChange({});
}
return (
<div className="flex flex-wrap items-end gap-2 border rounded-md p-2 bg-muted/30">
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Types (CSV)</label>
<Input
placeholder="image/, application/pdf"
value={toCSV(local.types)}
onChange={(e) => setLocal((p) => ({ ...p, types: parseCSV(e.target.value) }))}
className="w-56"
/>
</div>
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Owner (CSV)</label>
<Input
placeholder="admin, user2"
value={toCSV(local.owner)}
onChange={(e) => setLocal((p) => ({ ...p, owner: parseCSV(e.target.value) }))}
className="w-40"
/>
</div>
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Tags (CSV)</label>
<Input
placeholder="design, approved"
value={toCSV(local.tags)}
onChange={(e) => setLocal((p) => ({ ...p, tags: parseCSV(e.target.value) }))}
className="w-44"
/>
</div>
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Path prefix</label>
<Input
placeholder="/remote.php/dav/files/admin/Projects"
value={(local.pathPrefix as string) ?? ""}
onChange={(e) =>
setLocal((p) => ({
...p,
pathPrefix: (e.target.value.trim() || undefined) as PathId | undefined,
}))
}
className="w-[28rem]"
/>
</div>
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Date from</label>
<Input
type="date"
value={local.dateFrom ?? ""}
onChange={(e) => setLocal((p) => ({ ...p, dateFrom: e.target.value }))}
className="w-40"
/>
</div>
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Date to</label>
<Input
type="date"
value={local.dateTo ?? ""}
onChange={(e) => setLocal((p) => ({ ...p, dateTo: e.target.value }))}
className="w-40"
/>
</div>
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Min size (bytes)</label>
<Input
type="number"
inputMode="numeric"
value={local.sizeMinBytes ?? ""}
onChange={(e) => setLocal((p) => ({ ...p, sizeMinBytes: parseNumber(e.target.value) }))}
className="w-40"
/>
</div>
<div className="flex flex-col gap-1">
<label className="text-xs text-muted-foreground">Max size (bytes)</label>
<Input
type="number"
inputMode="numeric"
value={local.sizeMaxBytes ?? ""}
onChange={(e) => setLocal((p) => ({ ...p, sizeMaxBytes: parseNumber(e.target.value) }))}
className="w-40"
/>
</div>
<div className="ml-auto flex gap-2">
<Button variant="outline" size="sm" onClick={clearAll}>
Clear
</Button>
<Button size="sm" onClick={apply}>
Apply
</Button>
</div>
</div>
);
}
function parseCSV(v?: string | string[]): string[] | undefined {
if (Array.isArray(v)) return v;
const s = (v ?? "").trim();
if (!s) return undefined;
return s.split(",").map((x) => x.trim()).filter(Boolean);
}
function toCSV(v?: string[]) {
return (v ?? []).join(", ");
}
function normalizeEmpty<T extends string | undefined>(v?: T) {
return v && v.trim().length ? v : undefined;
}
function parseNumber(v?: string) {
if (v == null || v === "") return undefined;
const n = Number(v);
return Number.isFinite(n) ? n : undefined;
}
function normalizeNumber(n?: number | null) {
if (n == null) return undefined;
return Number.isFinite(n) ? n : undefined;
}

View File

@ -0,0 +1,159 @@
"use client";
import * as React from "react";
import * as Sentry from "@sentry/nextjs";
import { Button } from "@/components/ui/button";
import type { SearchHit } from "@/types/search";
type WhyState = {
loading: boolean;
mode: "scores" | "llm";
text?: string;
error?: string;
};
export function SearchResultsList({
q,
hits,
onOpenPath,
onDownloadPath,
onTagsPath,
}: {
q: string;
hits: SearchHit[];
onOpenPath?: (path: string) => void;
onDownloadPath?: (path: string) => void;
onTagsPath?: (path: string) => void;
}) {
const [why, setWhy] = React.useState<Record<string, WhyState>>({});
async function fetchWhy(id: string, payload: Record<string, unknown>) {
return Sentry.startSpan(
{ op: "ui.click", name: "search.why" },
async () => {
const res = await fetch("/api/search/why", {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify(payload),
});
if (!res.ok) {
const data = await res.json().catch(() => ({}));
throw new Error(data?.message || `Why failed (${res.status})`);
}
const data = (await res.json()) as { explanation: string };
return data.explanation;
},
);
}
async function handleWhy(hit: SearchHit, mode: "scores" | "llm") {
const id = hit.doc.id;
setWhy((prev) => ({ ...prev, [id]: { loading: true, mode } }));
try {
const explanation = await fetchWhy(id, {
q,
mode,
doc: {
id: hit.doc.id,
name: hit.doc.name,
path: hit.doc.path,
mimeType: hit.doc.mimeType,
sizeBytes: hit.doc.sizeBytes,
},
highlights: hit.highlights,
bm25Score: hit.bm25Score,
vectorScore: hit.vectorScore,
});
setWhy((prev) => ({ ...prev, [id]: { loading: false, mode, text: explanation } }));
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
setWhy((prev) => ({ ...prev, [id]: { loading: false, mode, error: message } }));
Sentry.captureException(err);
}
}
const highlight = (parts?: string[]) => {
if (!parts || parts.length === 0) return null;
const html = parts.slice(0, 2).join(" … ");
return (
<div
className="text-xs text-muted-foreground"
dangerouslySetInnerHTML={{ __html: html }}
/>
);
};
return (
<div className="space-y-2">
{hits.map((h) => {
const state = why[h.doc.id];
return (
<div key={h.doc.id} className="border rounded-md p-2">
<div className="flex items-center justify-between gap-2">
<div className="min-w-0">
<button
className="text-left font-medium hover:underline truncate"
title={h.doc.path}
onClick={() => onOpenPath?.(h.doc.path)}
>
{h.doc.name}
</button>
<div className="text-xs text-muted-foreground truncate">{h.doc.path}</div>
</div>
<div className="shrink-0 flex items-center gap-2">
<div className="text-xs text-muted-foreground">
{typeof h.bm25Score === "number" ? `BM25 ${h.bm25Score.toFixed(2)}` : null}
{typeof h.vectorScore === "number" ? ` · Vec ${h.vectorScore.toFixed(2)}` : null}
</div>
<Button variant="secondary" size="sm" onClick={() => onDownloadPath?.(h.doc.path)}>
Download
</Button>
<Button variant="ghost" size="sm" onClick={() => onTagsPath?.(h.doc.path)}>
Tags
</Button>
<Button
variant="default"
size="sm"
onClick={() => handleWhy(h, "scores")}
disabled={state?.loading}
title="Explain using scores/highlights only"
>
{state?.loading && state.mode === "scores" ? "Why…" : "Why (scores)"}
</Button>
<Button
variant="default"
size="sm"
onClick={() => handleWhy(h, "llm")}
disabled={state?.loading}
title="Explain using LLM (rate limited)"
>
{state?.loading && state.mode === "llm" ? "Why…" : "Why (LLM)"}
</Button>
</div>
</div>
{/* Highlights */}
<div className="mt-1 space-y-1">
{highlight(h.highlights?.name)}
{highlight(h.highlights?.title)}
{highlight(h.highlights?.content)}
</div>
{/* Why output */}
{state?.text ? (
<div className="mt-2 bg-muted/40 rounded-md p-2">
<pre className="whitespace-pre-wrap text-xs">{state.text}</pre>
</div>
) : null}
{state?.error ? (
<div className="mt-2 text-xs text-destructive">{state.error}</div>
) : null}
</div>
);
})}
{hits.length === 0 ? (
<div className="text-sm text-muted-foreground">No results</div>
) : null}
</div>
);
}

View File

@ -259,17 +259,23 @@ export async function bulkIndex(docs: IndexDocument[]): Promise<void> {
} }
function normalizeHits( function normalizeHits(
hits: Array<{ _id: string; _source: unknown; _score?: number }>, hits: Array<{
): { id: string; _source: unknown; _score: number }[] { _id: string;
_source: unknown;
_score?: number;
highlight?: Record<string, string[]>;
}>,
): { id: string; _source: unknown; _score: number; _highlight?: Record<string, string[]> }[] {
return hits.map((h) => ({ return hits.map((h) => ({
id: h._id, id: h._id,
_source: h._source, _source: h._source,
_score: h._score ?? 0, _score: h._score ?? 0,
_highlight: h.highlight,
})); }));
} }
function toSearchResult( function toSearchResult(
hits: { id: string; _source: unknown; _score: number }[], hits: { id: string; _source: unknown; _score: number; _highlight?: Record<string, string[]> }[],
total: number, total: number,
took: number, took: number,
): SearchResult { ): SearchResult {
@ -281,12 +287,20 @@ function toSearchResult(
bm25Score?: number; bm25Score?: number;
vectorScore?: number; vectorScore?: number;
}; };
const hl = h._highlight;
return { return {
// We store docs in the same shape as IndexDocument; API layer adapts to UI type. // We store docs in the same shape as IndexDocument; API layer adapts to UI type.
doc: src as unknown as DocType, doc: src as unknown as DocType,
score: h._score, score: h._score,
bm25Score: src.bm25Score, bm25Score: src.bm25Score,
vectorScore: src.vectorScore, vectorScore: src.vectorScore,
highlights: hl
? {
name: hl.name,
title: hl.title,
content: hl.content,
}
: undefined,
}; };
}), }),
}; };
@ -339,6 +353,8 @@ export async function bm25Search(
}, },
highlight: q highlight: q
? { ? {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fields: { fields: {
name: {}, name: {},
title: {}, title: {},
@ -359,7 +375,12 @@ export async function bm25Search(
? res.hits.total ? res.hits.total
: res.hits.total?.value ?? 0; : res.hits.total?.value ?? 0;
const hits = normalizeHits( const hits = normalizeHits(
res.hits.hits as Array<{ _id: string; _source: unknown; _score?: number }>, res.hits.hits as Array<{
_id: string;
_source: unknown;
_score?: number;
highlight?: Record<string, string[]>;
}>,
); );
return toSearchResult(hits, total, res.took ?? 0); return toSearchResult(hits, total, res.took ?? 0);
}, },
@ -441,10 +462,15 @@ export async function hybridSearch(
vector ? knnSearchEmbedding(vector, { filters, k: perPage * 4 }) : Promise.resolve(null), vector ? knnSearchEmbedding(vector, { filters, k: perPage * 4 }) : Promise.resolve(null),
]); ]);
// Normalize and blend // Build maps and normalize
const scores = new Map< const byId = new Map<
string, string,
{ src: DocType; bm25?: number; vec?: number } {
doc: DocType;
bm25?: number;
vec?: number;
highlights?: { name?: string[]; title?: string[]; content?: string[] };
}
>(); >();
let maxBm25 = 0; let maxBm25 = 0;
@ -459,37 +485,45 @@ export async function hybridSearch(
} }
for (const h of bm25.hits) { for (const h of bm25.hits) {
scores.set(h.doc.id, { src: h.doc, bm25: maxBm25 ? h.score / maxBm25 : 0 }); byId.set(h.doc.id, {
doc: h.doc,
bm25: maxBm25 ? h.score / maxBm25 : 0,
highlights: h.highlights,
});
} }
if (knn) { if (knn) {
for (const h of knn.hits) { for (const h of knn.hits) {
const prev = scores.get(h.doc.id); const prev = byId.get(h.doc.id);
const norm = maxVec ? h.score / maxVec : 0; const norm = maxVec ? h.score / maxVec : 0;
if (prev) { if (prev) prev.vec = norm;
prev.vec = norm; else byId.set(h.doc.id, { doc: h.doc, vec: norm });
} else {
scores.set(h.doc.id, { src: h.doc, vec: norm });
}
} }
} }
const blended = Array.from(scores.entries()).map(([id, v]) => { // Blend scores
const blended = Array.from(byId.entries()).map(([id, v]) => {
const b = v.bm25 ?? 0; const b = v.bm25 ?? 0;
const w = v.vec ?? 0; const w = v.vec ?? 0;
const score = alpha * b + (1 - alpha) * w; const score = alpha * b + (1 - alpha) * w;
const srcObj = v.src as unknown as Record<string, unknown>;
return { return {
id, id,
_source: { ...srcObj, bm25Score: b, vectorScore: w }, doc: v.doc,
_score: score, score,
bm25Score: b,
vectorScore: w,
highlights: v.highlights,
}; };
}); });
blended.sort((a, b) => b._score - a._score); blended.sort((a, b) => b.score - a.score);
const pageStart = (page - 1) * perPage; const pageStart = (page - 1) * perPage;
const pageItems = blended.slice(pageStart, pageStart + perPage); const pageItems = blended.slice(pageStart, pageStart + perPage);
return toSearchResult(pageItems, blended.length, 0); return {
total: blended.length,
tookMs: 0,
hits: pageItems,
};
}, },
); );
} }

View File

@ -14,6 +14,12 @@ export interface FacetFilters {
export type SortMode = "relevance" | "modified" | "size"; export type SortMode = "relevance" | "modified" | "size";
export type Highlights = {
name?: string[];
title?: string[];
content?: string[];
};
export interface SearchQuery { export interface SearchQuery {
q: string; q: string;
filters?: FacetFilters; filters?: FacetFilters;
@ -28,6 +34,7 @@ export interface SearchHit {
score: number; score: number;
bm25Score?: number; bm25Score?: number;
vectorScore?: number; vectorScore?: number;
highlights?: Highlights;
} }
export interface SearchResult { export interface SearchResult {