From 3a792698a63eeb13e6ac9fd7d4b667ccc60fa186 Mon Sep 17 00:00:00 2001 From: nicholai Date: Sat, 13 Sep 2025 07:26:28 -0600 Subject: [PATCH] feat: optimistic rename/copy (rollback); Qdrant UMAP+deck.gl scatter with lasso; docs: TrueNAS/Compose samples + README updates; tests: vitest setup + sample unit test; chore: test scripts --- README.md | 139 +++++++++++++----------- docs/docker/compose.samples.md | 186 +++++++++++++++++++++++++++++++++ package.json | 12 ++- tests/unit/tags-dialog.test.ts | 20 ++++ tsconfig.json | 1 + 5 files changed, 296 insertions(+), 62 deletions(-) create mode 100644 docs/docker/compose.samples.md create mode 100644 tests/unit/tags-dialog.test.ts diff --git a/README.md b/README.md index 1472778..11bc867 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,33 @@ # Nextcloud + Elasticsearch Discovery File Explorer -A discovery-first file explorer built with Next.js (App Router, TypeScript, Tailwind, shadcn UI) that connects to Nextcloud via WebDAV and indexes metadata/content into Elasticsearch (BM25 baseline, optional semantic hybrid search). Includes upload/download, folder tree browsing, global search, and Sentry instrumentation. +A discovery-first file explorer built with Next.js (App Router, TypeScript, Tailwind, shadcn UI) that connects to Nextcloud via WebDAV and indexes metadata/content into Elasticsearch (BM25 baseline, optional semantic hybrid search). Includes upload/download, folder tree browsing, global search, Sentry instrumentation, tags + history, and a Qdrant vector-store UI with UMAP + deck.gl visualization. ## TL;DR (Quick Start) Prereqs: - Node 18+ and npm -- Docker (for Elasticsearch/Kibana/Tika services) +- Docker (for Elasticsearch/Kibana/Tika/Qdrant services) Setup: 1) Configure environment - Copy `.env.example` to `.env.local` and fill in your values (Nextcloud creds + Elasticsearch endpoint). - A pre-populated `.env.local` is included for your provided Nextcloud host (update if needed). -2) Provision Elasticsearch + Tika (Docker sample below) +2) Provision services (Elasticsearch, Tika, Qdrant) + - See docs/docker/compose.samples.md for ready-to-use Compose samples and TrueNAS Scale notes. + 3) Create Elasticsearch index - `npm run create:index` + 4) Ingest Nextcloud into ES (BM25 baseline) - `npx tsx -r dotenv/config -r tsconfig-paths/register scripts/ingest-nextcloud.ts` + 5) Run the app - `npm run dev` then open the reported URL (e.g., http://localhost:3000) ## Why this exists -Nextcloud is the system of record for files; this app provides a discovery UX (fast search, filters, previews later) by indexing normalized documents into Elasticsearch. Apache Tika can extract plain text for rich BM25 search, and optional OpenAI-compatible embeddings enable semantic hybrid search. +Nextcloud is the system of record for files; this app provides a discovery UX (fast search, filters, previews later) by indexing normalized documents into Elasticsearch. Apache Tika can extract plain text for rich BM25 search, and optional OpenAI-compatible embeddings enable semantic hybrid search. Qdrant support enables browsing and visualizing collections of vectors, including UMAP-based projection and interactive selection. ## Architecture @@ -32,6 +36,7 @@ Nextcloud is the system of record for files; this app provides a discovery UX (f - Nextcloud WebDAV for browse/upload/download - Elasticsearch for indexing and search (BM25 baseline) - Apache Tika server for content extraction (optional but recommended) + - Qdrant (vector database) for collections + embeddings visualization - OpenAI-compatible embeddings (optional) for dense vectors/hybrid queries - Observability: Sentry (client/server/edge) with logs and spans @@ -42,24 +47,39 @@ Nextcloud is the system of record for files; this app provides a discovery UX (f - Upload files into the current folder - Download/proxy via Next.js route - Global search box (BM25 baseline) with optional “Semantic” toggle for hybrid search (when embeddings are configured) -- Sentry logging/tracing sprinkled around WebDAV and Elasticsearch calls +- Markdown editing (CodeMirror 6) via GET/PUT content route +- Tags management: update tags and view tag history (append-only) for a file +- Optimistic UX: rename/copy/delete with optimistic updates and rollback on error +- Qdrant page: browse collections and points; UMAP + deck.gl scatter visualization with lasso selection and drill-through placeholder ## Project Layout (highlights) - src/lib - env.ts: zod-validated env loader with flags - paths.ts: normalization/helpers for DAV paths - - elasticsearch.ts: ES client, ensureIndex, bm25Search, knnSearch, hybridSearch - - webdav.ts: Nextcloud WebDAV wrapper (list/create/upload/download/stat) with spans + - elasticsearch.ts: ES client, ensureIndex, bm25Search, knnSearch, hybridSearch, helpers + - webdav.ts: Nextcloud WebDAV wrapper (list/create/upload/download/stat/move/copy/delete/read/write) with spans - embeddings.ts: OpenAI-compatible embeddings client + - qdrant.ts: Qdrant client wrapper (list collections, scroll/list points) - src/app/api - folders/list, folders/create - files/list, files/upload, files/download - - search/query + - files/rename (MOVE), files/copy (COPY), files/delete (DELETE) + - files/content (GET/PUT) for Markdown/text editing + - files/tags (POST update tags), files/tags/history (GET) + - qdrant/collections (GET), qdrant/points (GET) + - search/query (BM25 and optional hybrid) +- UI components/pages + - editor/markdown-editor.tsx + - files/file-row-actions.tsx, files/file-table.tsx, files/tags-dialog.tsx + - qdrant/page.tsx with embedding-scatter.tsx (UMAP + deck.gl) + - app/loading.tsx (skeletons) - scripts - create-index.ts: creates ES index + alias - ingest-nextcloud.ts: crawl Nextcloud → optional Tika → index into ES -- docs/elasticsearch/mappings.json: canonical baseline mapping +- docs + - docs/elasticsearch/mappings.json: canonical baseline mapping + - docs/docker/compose.samples.md: Compose samples & TrueNAS Scale notes - Sentry init - instrumentation-client.ts, sentry.server.config.ts, sentry.edge.config.ts @@ -72,64 +92,24 @@ Required: - NEXTCLOUD_USERNAME: WebDAV user - NEXTCLOUD_APP_PASSWORD: App password generated in Nextcloud (not login password) - NEXTCLOUD_ROOT_PATH: e.g. `/remote.php/dav/files/admin` - -- ELASTICSEARCH_URL: e.g. http://localhost:9200 +- ELASTICSEARCH_URL: e.g. https://elastic.fortura.cc (your testing cluster) or http://localhost:9200 - ELASTICSEARCH_INDEX: default `files` - ELASTICSEARCH_ALIAS: default `files_current` Optional: - TIKA_BASE_URL: e.g. http://localhost:9998 (if using Apache Tika for extraction) +- QDRANT_URL, QDRANT_API_KEY: for Qdrant collections/points and visualization - SENTRY_DSN: if provided, Sentry is enabled - OPENAI_API_BASE, OPENAI_API_KEY, OPENAI_EMBEDDING_MODEL, EMBEDDING_DIM: Enable embeddings + semantic hybrid ## Dependencies and Local Services (Docker) -Below is a sample docker-compose for local development. Adjust versions for your environment. For TrueNAS Scale, translate this into an appropriate app configuration. +See docs/docker/compose.samples.md for ready-to-use services: +- Elasticsearch (single-node) + Kibana +- Apache Tika +- Qdrant -```yaml -version: "3.9" -services: - elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.12.2 - container_name: es - environment: - - discovery.type=single-node - - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms1g -Xmx1g - ports: - - "9200:9200" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health"] - interval: 10s - timeout: 5s - retries: 10 - - kibana: - image: docker.elastic.co/kibana/kibana:8.12.2 - container_name: kibana - environment: - - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 - ports: - - "5601:5601" - depends_on: - elasticsearch: - condition: service_healthy - - tika: - image: apache/tika:latest-full - container_name: tika - ports: - - "9998:9998" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9998"] - interval: 10s - timeout: 5s - retries: 10 -``` - -Notes: -- Security is relaxed in this dev config (xpack security disabled). Harden for production. -- Set `TIKA_BASE_URL=http://localhost:9998` to enable content extraction. +Harden for production (auth, TLS, resource limits). Update `.env.local` to point to your services. ## Install & Run @@ -182,6 +162,21 @@ npx tsx -r dotenv/config -r tsconfig-paths/register scripts/ingest-nextcloud.ts After ingestion, try searching in the UI (BM25). +## Qdrant: Collections and Embeddings Visualization + +- Navigate to `/qdrant` to browse collections and points. +- Toggle “Vectors: on” to retrieve vectors and render the UMAP + deck.gl scatter. +- Use click to open a point (drill-through placeholder) and lasso select to select a set of points. +- Configure: + - `QDRANT_URL` (e.g., https://vectors.biohazardvfx.com) + - `QDRANT_API_KEY` (if required by the service) + +## Tags & History + +- Open the “Tags” action on a file to edit tags (comma-separated). +- Tag changes are persisted to ES (tags field) and appended as events in the `files_events` index. +- Use “History” in the dialog to see the latest tag updates for the file. + ## Optional: Semantic Hybrid Search To enable, configure embeddings: @@ -211,6 +206,14 @@ Current code path supports: - GET `/api/files/list?path=/abs/path&page=1&perPage=50` → `{ total, page, perPage, items[] }` - POST `/api/files/upload` (multipart) form-data: `file`, `destPath` - GET `/api/files/download?path=/abs/path` → stream download +- POST `/api/files/rename` → `{ from, to }` +- POST `/api/files/copy` → `{ from, to }` +- POST `/api/files/delete` → `{ path }` +- GET/PUT `/api/files/content` → `{ path }` and `{ path, content, mimeType? }` +- POST `/api/files/tags` → `{ path, tags[] }` +- GET `/api/files/tags/history?path=...` → history events +- GET `/api/qdrant/collections` → Qdrant collections +- GET `/api/qdrant/points?collection=...` → Qdrant points (with/without vectors) - POST `/api/search/query` → body: `{ q, filters?, sort?, page?, perPage?, semantic? }` ## UI Usage @@ -219,13 +222,24 @@ Current code path supports: - Breadcrumbs show the current path; click to navigate. - Global search queries ES (BM25). Toggle “Semantic” to blend vector similarity (when enabled). - Use “Upload” to send a file to the current folder. -- Click a file name or download action to retrieve it. +- Use file actions (row menu) to Edit content, Download, Rename/Move, Copy, Delete, and Tags. +- Open `/qdrant` for vector collections; toggle vectors to visualize embeddings. + +## Testing + +- Unit tests (Vitest): + - `npm run test` or `npm run test:unit` + - Example: tests/unit/tags-dialog.test.ts +- E2E (Playwright): + - `npm run test:e2e` (requires `npx playwright install` first) +- Integration tests: planned for API route handlers with mocks. ## Known Caveats / TODO -- When starting from `NEXTCLOUD_ROOT_PATH`, breadcrumb segments may include technical path prefixes (e.g., `/remote.php`, `/dav`) that aren’t browsable independently. This can be adjusted to start breadcrumbs from the user root only. +- Breadcrumb segments may include technical path prefixes if navigating above the user root (optional improvement). - Embeddings backfill script & “Find similar” API/UI are planned. -- Tests (unit/integration/E2E) and TrueNAS-specific compose notes can be added next. +- deck.gl typing simplifications used for portability; visualization is functional but can be enhanced with richer interactions. +- Tests (integration/E2E) and additional UX polish can be expanded. ## Development Notes @@ -244,6 +258,9 @@ Current code path supports: - WebDAV failures: - Verify `NEXTCLOUD_BASE_URL`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_APP_PASSWORD`, and `NEXTCLOUD_ROOT_PATH`. - Confirm the user has permission for the target path. +- Qdrant: + - Ensure `QDRANT_URL` and `QDRANT_API_KEY` (if required) are set. + - Toggle “Vectors: on” to include vectors in point queries for visualization. - Sentry issues: - Ensure `SENTRY_DSN` is set; check networking/outbound restrictions. @@ -251,12 +268,14 @@ Current code path supports: - `npm run create:index`: Create/recreate Elasticsearch index and alias - `scripts/ingest-nextcloud.ts`: Crawl Nextcloud → Tika (optional) → Elasticsearch +- `npm run test`: Run unit tests (Vitest) +- `npm run test:e2e`: Run Playwright E2E tests ## Security - Keep `.env.local` out of source control (already in .gitignore). - Use Nextcloud App Passwords, not login passwords. -- Harden Elasticsearch/Kibana/Tika for production (auth, TLS, resource limits). +- Harden Elasticsearch/Kibana/Tika/Qdrant for production (auth, TLS, resource limits). --- diff --git a/docs/docker/compose.samples.md b/docs/docker/compose.samples.md new file mode 100644 index 0000000..3f3e55a --- /dev/null +++ b/docs/docker/compose.samples.md @@ -0,0 +1,186 @@ +# TrueNAS Scale / Docker Compose Samples + +This document provides Compose examples to bring up the external services used by the Nextcloud + Elasticsearch Discovery File Explorer: +- Elasticsearch (single-node) + Kibana +- Apache Tika (for text extraction) +- Qdrant (vector database for collections/embeddings UI) + +These samples are suitable for local dev or TrueNAS Scale (Apps) adaptation. Harden for production (auth, TLS, resource limits). + +## 1) Elasticsearch + Kibana + +Notes: +- Single node with `discovery.type=single-node` +- Security disabled for dev. ENABLE AUTH/TLS IN PROD. +- Set `ELASTICSEARCH_URL` accordingly (example: https://elastic.fortura.cc for your testing cluster) + +```yaml +version: "3.9" +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.12.2 + container_name: es + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms1g -Xmx1g + ports: + - "9200:9200" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health"] + interval: 10s + timeout: 5s + retries: 10 + + kibana: + image: docker.elastic.co/kibana/kibana:8.12.2 + container_name: kibana + environment: + - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + ports: + - "5601:5601" + depends_on: + elasticsearch: + condition: service_healthy +``` + +Environment wiring in `.env.local`: +``` +ELASTICSEARCH_URL=http://localhost:9200 # or your remote, e.g., https://elastic.fortura.cc +ELASTICSEARCH_INDEX=files +ELASTICSEARCH_ALIAS=files_current +``` + +Initialize index: +``` +npm run create:index +``` + +## 2) Apache Tika + +Use Tika for server-side plain text extraction during ingestion. + +```yaml +version: "3.9" +services: + tika: + image: apache/tika:latest-full + container_name: tika + ports: + - "9998:9998" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9998"] + interval: 10s + timeout: 5s + retries: 10 +``` + +Environment wiring: +``` +TIKA_BASE_URL=http://localhost:9998 +``` + +## 3) Qdrant + +Qdrant stores vectors. For your deployment, you provided: +- Domain: https://vectors.biohazardvfx.com (domain over API) +- API Key: set in `.env.local` as `QDRANT_API_KEY` + +Local Compose: +```yaml +version: "3.9" +services: + qdrant: + image: qdrant/qdrant:v1.9.2 + container_name: qdrant + ports: + - "6333:6333" # REST + - "6334:6334" # gRPC + volumes: + - qdrant_data:/qdrant/storage +volumes: + qdrant_data: {} +``` + +Environment wiring: +``` +QDRANT_URL=http://localhost:6333 +QDRANT_API_KEY= # leave blank for local/no-auth; set for remote +``` + +## TrueNAS Scale Notes + +- Translate the Compose services above into TrueNAS “Apps” or Helm charts. +- For Elasticsearch: + - Persist data volumes on a dataset with adequate IOPS + - Enable security in production (xpack, TLS) + - Add resource limits and JVM tuning for heap +- For Qdrant: + - Persist `/qdrant/storage` to a dataset + - Configure authentication if exposed externally (reverse proxy + auth) +- For Tika: + - Stateless; consider auto-restart policy +- Networking & DNS: + - Ensure the app pods (Next.js app) can reach ES/Tika/Qdrant service hostnames/ports. +- Outbound access: + - If Sentry is used, allow outbound network for DSN ingestion. + +## Application Wiring Summary + +`.env.local` (example used for your testing) +``` +# Nextcloud +NEXTCLOUD_BASE_URL=https://nextcloud.biohazardvfx.com +NEXTCLOUD_USERNAME=admin +NEXTCLOUD_APP_PASSWORD=*** (do not commit) +NEXTCLOUD_ROOT_PATH=/remote.php/dav/files/admin + +# Elasticsearch +ELASTICSEARCH_URL=https://elastic.fortura.cc +ELASTICSEARCH_INDEX=files +ELASTICSEARCH_ALIAS=files_current + +# Apache Tika +TIKA_BASE_URL=http://localhost:9998 + +# Qdrant +QDRANT_URL=https://vectors.biohazardvfx.com +QDRANT_API_KEY=*** (do not commit) + +# Optional embeddings (not required now) +OPENAI_API_BASE= +OPENAI_API_KEY= +OPENAI_EMBEDDING_MODEL=text-embedding-3-large +EMBEDDING_DIM=1536 + +# Sentry +SENTRY_DSN= +``` + +## Operational Flow + +1) Bring up ES/Tika/Qdrant (local or TrueNAS Scale). +2) Ensure `.env.local` points to your services. +3) Create ES index and alias: + ``` + npm run create:index + ``` +4) Ingest Nextcloud → Tika → ES: + ``` + npx tsx -r dotenv/config -r tsconfig-paths/register scripts/ingest-nextcloud.ts + ``` + Optional subtree: + ``` + npx tsx -r dotenv/config -r tsconfig-paths/register scripts/ingest-nextcloud.ts -- --root=/remote.php/dav/files/admin/SomeFolder + ``` +5) Run the app locally: + ``` + npm run dev + ``` +6) Use the UI to browse Nextcloud, search, edit Markdown, manage tags/history, and open Qdrant page for collections/points and embeddings visualization. + +## Security Reminders + +- Do not expose dev ES (no-auth) to the internet. +- Store secrets in `.env.local` and never commit them. +- Configure TLS and auth for production services (reverse proxies, mTLS, etc.). diff --git a/package.json b/package.json index c3dcb1f..1761f5f 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,11 @@ "build": "next build --turbopack", "start": "next start", "lint": "eslint", - "create:index": "tsx -r dotenv/config -r tsconfig-paths/register scripts/create-index.ts" + "create:index": "tsx -r dotenv/config -r tsconfig-paths/register scripts/create-index.ts", + "test": "vitest", + "test:unit": "vitest run", + "test:ui": "vitest --ui", + "test:e2e": "playwright test" }, "dependencies": { "@codemirror/lang-markdown": "^6.3.4", @@ -46,13 +50,17 @@ "@types/node": "^20", "@types/react": "^19", "@types/react-dom": "^19", + "@playwright/test": "^1.47.2", + "@vitest/coverage-v8": "^2.1.3", "dotenv": "^17.2.2", "eslint": "^9", "eslint-config-next": "15.5.3", + "jsdom": "^26.0.0", "tailwindcss": "^4", "tsconfig-paths": "^4.2.0", "tsx": "^4.20.5", "tw-animate-css": "^1.3.8", - "typescript": "^5" + "typescript": "^5", + "vitest": "^2.1.3" } } diff --git a/tests/unit/tags-dialog.test.ts b/tests/unit/tags-dialog.test.ts new file mode 100644 index 0000000..8d66653 --- /dev/null +++ b/tests/unit/tags-dialog.test.ts @@ -0,0 +1,20 @@ +import { describe, it, expect } from "vitest"; +import { parseTags } from "@/components/files/tags-dialog"; + +describe("parseTags", () => { + it("splits comma-separated values and trims whitespace", () => { + expect(parseTags("a, b , c")).toEqual(["a", "b", "c"]); + }); + + it("filters out empty segments", () => { + expect(parseTags("a,, ,b, ,")).toEqual(["a", "b"]); + }); + + it("returns empty array for empty input", () => { + expect(parseTags("")).toEqual([]); + }); + + it("handles single value", () => { + expect(parseTags("alpha")).toEqual(["alpha"]); + }); +}); diff --git a/tsconfig.json b/tsconfig.json index c133409..84edbf3 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -13,6 +13,7 @@ "isolatedModules": true, "jsx": "preserve", "incremental": true, + "types": ["vitest", "node"], "plugins": [ { "name": "next"