From 510c4a5188b9bec8278bd73ab6619a27e08a6aaf Mon Sep 17 00:00:00 2001 From: Dinh Long Nguyen Date: Wed, 8 Oct 2025 16:08:40 +0700 Subject: [PATCH] working attachments --- core/src/browser/extension.ts | 2 + core/src/browser/extensions/index.ts | 5 + core/src/browser/extensions/rag.ts | 32 + core/src/browser/extensions/vector-db.ts | 62 ++ extensions/llamacpp-extension/src/index.ts | 60 +- extensions/rag-extension/package.json | 33 + extensions/rag-extension/rolldown.config.mjs | 14 + extensions/rag-extension/settings.json | 58 ++ extensions/rag-extension/src/env.d.ts | 5 + extensions/rag-extension/src/global.d.ts | 14 + extensions/rag-extension/src/index.ts | 328 ++++++++++ extensions/rag-extension/src/tools.ts | 58 ++ extensions/vector-db-extension/package.json | 32 + .../vector-db-extension/rolldown.config.mjs | 11 + extensions/vector-db-extension/src/index.ts | 58 ++ scripts/download-bin.mjs | 127 ++++ src-tauri/Cargo.lock | 379 ++++++++++- src-tauri/Cargo.toml | 2 + src-tauri/capabilities/default.json | 2 + src-tauri/capabilities/desktop.json | 4 +- src-tauri/plugins/tauri-plugin-rag/.gitignore | 17 + src-tauri/plugins/tauri-plugin-rag/Cargo.toml | 31 + src-tauri/plugins/tauri-plugin-rag/build.rs | 7 + .../tauri-plugin-rag/guest-js/index.ts | 6 + .../plugins/tauri-plugin-rag/package.json | 33 + .../commands/parse_document.toml | 13 + .../permissions/autogenerated/reference.md | 43 ++ .../tauri-plugin-rag/permissions/default.toml | 6 + .../permissions/schemas/schema.json | 318 +++++++++ .../plugins/tauri-plugin-rag/rollup.config.js | 32 + .../plugins/tauri-plugin-rag/src/commands.rs | 12 + .../plugins/tauri-plugin-rag/src/error.rs | 20 + src-tauri/plugins/tauri-plugin-rag/src/lib.rs | 20 + .../plugins/tauri-plugin-rag/src/parser.rs | 258 ++++++++ .../plugins/tauri-plugin-rag/tsconfig.json | 15 + .../plugins/tauri-plugin-vector-db/.gitignore | 17 + .../plugins/tauri-plugin-vector-db/Cargo.toml | 25 + .../plugins/tauri-plugin-vector-db/build.rs | 12 + .../tauri-plugin-vector-db/guest-js/index.ts | 106 +++ .../tauri-plugin-vector-db/package.json | 33 + .../autogenerated/commands/chunk_text.toml | 13 + .../commands/create_collection.toml | 13 + .../autogenerated/commands/delete_chunks.toml | 13 + .../commands/delete_collection.toml | 13 + .../autogenerated/commands/get_chunks.toml | 13 + .../autogenerated/commands/get_status.toml | 13 + .../autogenerated/commands/insert_chunks.toml | 13 + .../commands/list_attachments.toml | 14 + .../commands/search_collection.toml | 13 + .../permissions/autogenerated/reference.md | 259 ++++++++ .../permissions/default.toml | 13 + .../permissions/schemas/schema.json | 414 ++++++++++++ .../tauri-plugin-vector-db/rollup.config.js | 32 + .../tauri-plugin-vector-db/src/commands.rs | 167 +++++ .../plugins/tauri-plugin-vector-db/src/db.rs | 612 ++++++++++++++++++ .../tauri-plugin-vector-db/src/error.rs | 23 + .../plugins/tauri-plugin-vector-db/src/lib.rs | 34 + .../tauri-plugin-vector-db/src/state.rs | 17 + .../tauri-plugin-vector-db/src/utils.rs | 27 + .../tauri-plugin-vector-db/tsconfig.json | 15 + src-tauri/src/lib.rs | 4 +- web-app/src/containers/ChatInput.tsx | 288 +++++++-- web-app/src/hooks/useAttachments.ts | 163 ++++- web-app/src/hooks/useThreads.ts | 24 +- web-app/src/hooks/useTools.ts | 27 +- web-app/src/lib/completion.ts | 96 ++- web-app/src/locales/en/settings.json | 7 +- web-app/src/routes/settings/attachments.tsx | 334 +++++----- web-app/src/services/rag/default.ts | 44 +- web-app/src/services/rag/types.ts | 6 +- 70 files changed, 4699 insertions(+), 335 deletions(-) create mode 100644 core/src/browser/extensions/rag.ts create mode 100644 core/src/browser/extensions/vector-db.ts create mode 100644 extensions/rag-extension/package.json create mode 100644 extensions/rag-extension/rolldown.config.mjs create mode 100644 extensions/rag-extension/settings.json create mode 100644 extensions/rag-extension/src/env.d.ts create mode 100644 extensions/rag-extension/src/global.d.ts create mode 100644 extensions/rag-extension/src/index.ts create mode 100644 extensions/rag-extension/src/tools.ts create mode 100644 extensions/vector-db-extension/package.json create mode 100644 extensions/vector-db-extension/rolldown.config.mjs create mode 100644 extensions/vector-db-extension/src/index.ts create mode 100644 src-tauri/plugins/tauri-plugin-rag/.gitignore create mode 100644 src-tauri/plugins/tauri-plugin-rag/Cargo.toml create mode 100644 src-tauri/plugins/tauri-plugin-rag/build.rs create mode 100644 src-tauri/plugins/tauri-plugin-rag/guest-js/index.ts create mode 100644 src-tauri/plugins/tauri-plugin-rag/package.json create mode 100644 src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/commands/parse_document.toml create mode 100644 src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/reference.md create mode 100644 src-tauri/plugins/tauri-plugin-rag/permissions/default.toml create mode 100644 src-tauri/plugins/tauri-plugin-rag/permissions/schemas/schema.json create mode 100644 src-tauri/plugins/tauri-plugin-rag/rollup.config.js create mode 100644 src-tauri/plugins/tauri-plugin-rag/src/commands.rs create mode 100644 src-tauri/plugins/tauri-plugin-rag/src/error.rs create mode 100644 src-tauri/plugins/tauri-plugin-rag/src/lib.rs create mode 100644 src-tauri/plugins/tauri-plugin-rag/src/parser.rs create mode 100644 src-tauri/plugins/tauri-plugin-rag/tsconfig.json create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/.gitignore create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/Cargo.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/build.rs create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/guest-js/index.ts create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/package.json create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/chunk_text.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/create_collection.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_chunks.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_collection.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_chunks.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_status.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/insert_chunks.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/list_attachments.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/search_collection.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/reference.md create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/default.toml create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/permissions/schemas/schema.json create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/rollup.config.js create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/src/commands.rs create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/src/db.rs create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/src/error.rs create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/src/lib.rs create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/src/state.rs create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/src/utils.rs create mode 100644 src-tauri/plugins/tauri-plugin-vector-db/tsconfig.json diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts index 78f90ba16..d562bb9ea 100644 --- a/core/src/browser/extension.ts +++ b/core/src/browser/extension.ts @@ -11,6 +11,8 @@ export enum ExtensionTypeEnum { HuggingFace = 'huggingFace', Engine = 'engine', Hardware = 'hardware', + RAG = 'rag', + VectorDB = 'vectorDB', } export interface ExtensionType { diff --git a/core/src/browser/extensions/index.ts b/core/src/browser/extensions/index.ts index 30c7de216..339bb0d2b 100644 --- a/core/src/browser/extensions/index.ts +++ b/core/src/browser/extensions/index.ts @@ -23,3 +23,8 @@ export { MCPExtension } from './mcp' * Base AI Engines. */ export * from './engines' + +export { RAGExtension, RAG_INTERNAL_SERVER } from './rag' +export type { AttachmentInput } from './rag' +export { VectorDBExtension } from './vector-db' +export type { SearchMode, VectorDBStatus, VectorChunkInput, VectorSearchResult, AttachmentFileInfo } from './vector-db' diff --git a/core/src/browser/extensions/rag.ts b/core/src/browser/extensions/rag.ts new file mode 100644 index 000000000..5df474ce0 --- /dev/null +++ b/core/src/browser/extensions/rag.ts @@ -0,0 +1,32 @@ +import { BaseExtension, ExtensionTypeEnum } from '../extension' +import type { MCPTool, MCPToolCallResult } from '../../types' + +export interface AttachmentInput { + path: string + name?: string + type?: string + size?: number +} + +export const RAG_INTERNAL_SERVER = 'rag-internal' + +/** + * RAG extension base: exposes RAG tools and orchestration API. + */ +export abstract class RAGExtension extends BaseExtension { + type(): ExtensionTypeEnum | undefined { + return ExtensionTypeEnum.RAG + } + + abstract getTools(): Promise + /** + * Lightweight list of tool names for quick routing/lookup. + */ + abstract getToolNames(): Promise + abstract callTool(toolName: string, args: Record): Promise + + abstract ingestAttachments(threadId: string, files: AttachmentInput[]): Promise<{ + filesProcessed: number + chunksInserted: number + } | void> +} diff --git a/core/src/browser/extensions/vector-db.ts b/core/src/browser/extensions/vector-db.ts new file mode 100644 index 000000000..ffbf0b4ab --- /dev/null +++ b/core/src/browser/extensions/vector-db.ts @@ -0,0 +1,62 @@ +import { BaseExtension, ExtensionTypeEnum } from '../extension' + +export type SearchMode = 'auto' | 'ann' | 'linear' + +export interface VectorDBStatus { + ann_available: boolean +} + +export interface VectorChunkInput { + id?: string + text: string + embedding: number[] + metadata?: Record +} + +export interface VectorSearchResult { + id: string + text: string + score?: number + file_id: string + chunk_file_order: number +} + +export interface AttachmentFileInfo { + id: string + name?: string + path?: string + type?: string + size?: number + chunk_count: number +} + +/** + * Vector DB extension base: abstraction over local vector storage and search. + */ +export abstract class VectorDBExtension extends BaseExtension { + type(): ExtensionTypeEnum | undefined { + return ExtensionTypeEnum.VectorDB + } + + abstract getStatus(): Promise + abstract createCollection(name: string, dimension: number): Promise + abstract insertChunks(collection: string, chunks: VectorChunkInput[]): Promise + abstract searchCollection( + collection: string, + query_embedding: number[], + limit: number, + threshold: number, + mode?: SearchMode, + fileIds?: string[] + ): Promise + abstract deleteChunks(collection: string, ids: string[]): Promise + abstract deleteCollection(collection: string): Promise + abstract chunkText(text: string, chunkSize: number, chunkOverlap: number): Promise + abstract listAttachments(collection: string, limit?: number): Promise + abstract getChunks( + collection: string, + fileId: string, + startOrder: number, + endOrder: number + ): Promise +} diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 07d49cd53..d45cab3fd 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -1631,7 +1631,7 @@ export default class llamacpp_extension extends AIEngine { if (cfg.no_kv_offload) args.push('--no-kv-offload') if (isEmbedding) { args.push('--embedding') - args.push('--pooling mean') + args.push('--pooling', 'mean') } else { if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size)) if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict)) @@ -2013,43 +2013,55 @@ export default class llamacpp_extension extends AIEngine { } async embed(text: string[]): Promise { + // Ensure the sentence-transformer model is present let sInfo = await this.findSessionByModel('sentence-transformer-mini') if (!sInfo) { const downloadedModelList = await this.list() - if ( - !downloadedModelList.some( - (model) => model.id === 'sentence-transformer-mini' - ) - ) { + if (!downloadedModelList.some((model) => model.id === 'sentence-transformer-mini')) { await this.import('sentence-transformer-mini', { modelPath: 'https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-ggml-model-f16.gguf?download=true', }) } - sInfo = await this.load('sentence-transformer-mini') + // Load specifically in embedding mode + sInfo = await this.load('sentence-transformer-mini', undefined, true) } - const baseUrl = `http://localhost:${sInfo.port}/v1/embeddings` - const headers = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${sInfo.api_key}`, + + const attemptRequest = async (session: SessionInfo) => { + const baseUrl = `http://localhost:${session.port}/v1/embeddings` + const headers = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${session.api_key}`, + } + const body = JSON.stringify({ + input: text, + model: session.model_id, + encoding_format: 'float', + }) + const response = await fetch(baseUrl, { + method: 'POST', + headers, + body, + }) + return response + } + + // First try with the existing session (may have been started without --embedding previously) + let response = await attemptRequest(sInfo) + + // If embeddings endpoint is not available (501), reload with embedding mode and retry once + if (response.status === 501) { + try { + await this.unload('sentence-transformer-mini') + } catch {} + sInfo = await this.load('sentence-transformer-mini', undefined, true) + response = await attemptRequest(sInfo) } - const body = JSON.stringify({ - input: text, - model: sInfo.model_id, - encoding_format: 'float', - }) - const response = await fetch(baseUrl, { - method: 'POST', - headers, - body, - }) if (!response.ok) { const errorData = await response.json().catch(() => null) throw new Error( - `API request failed with status ${response.status}: ${JSON.stringify( - errorData - )}` + `API request failed with status ${response.status}: ${JSON.stringify(errorData)}` ) } const responseData = await response.json() diff --git a/extensions/rag-extension/package.json b/extensions/rag-extension/package.json new file mode 100644 index 000000000..5634d5416 --- /dev/null +++ b/extensions/rag-extension/package.json @@ -0,0 +1,33 @@ +{ + "name": "@janhq/rag-extension", + "productName": "RAG Tools", + "version": "0.1.0", + "description": "Registers RAG tools and orchestrates retrieval across parser, embeddings, and vector DB", + "main": "dist/index.js", + "module": "dist/module.js", + "author": "Jan ", + "license": "AGPL-3.0", + "scripts": { + "build": "rolldown -c rolldown.config.mjs", + "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install" + }, + "devDependencies": { + "cpx": "1.5.0", + "rimraf": "6.0.1", + "rolldown": "1.0.0-beta.1", + "typescript": "5.9.2" + }, + "dependencies": { + "@janhq/core": "../../core/package.tgz", + "@janhq/tauri-plugin-rag-api": "link:../../src-tauri/plugins/tauri-plugin-rag", + "@janhq/tauri-plugin-vector-db-api": "link:../../src-tauri/plugins/tauri-plugin-vector-db" + }, + "files": [ + "dist/*", + "package.json" + ], + "installConfig": { + "hoistingLimits": "workspaces" + }, + "packageManager": "yarn@4.5.3" +} diff --git a/extensions/rag-extension/rolldown.config.mjs b/extensions/rag-extension/rolldown.config.mjs new file mode 100644 index 000000000..e9b190546 --- /dev/null +++ b/extensions/rag-extension/rolldown.config.mjs @@ -0,0 +1,14 @@ +import { defineConfig } from 'rolldown' +import settingJson from './settings.json' with { type: 'json' } + +export default defineConfig({ + input: 'src/index.ts', + output: { + format: 'esm', + file: 'dist/index.js', + }, + platform: 'browser', + define: { + SETTINGS: JSON.stringify(settingJson), + }, +}) diff --git a/extensions/rag-extension/settings.json b/extensions/rag-extension/settings.json new file mode 100644 index 000000000..1462db771 --- /dev/null +++ b/extensions/rag-extension/settings.json @@ -0,0 +1,58 @@ +[ + { + "key": "enabled", + "titleKey": "settings:attachments.enable", + "descriptionKey": "settings:attachments.enableDesc", + "controllerType": "checkbox", + "controllerProps": { "value": true } + }, + { + "key": "max_file_size_mb", + "titleKey": "settings:attachments.maxFile", + "descriptionKey": "settings:attachments.maxFileDesc", + "controllerType": "input", + "controllerProps": { "value": 20, "type": "number", "min": 1, "max": 200, "step": 1, "textAlign": "right" } + }, + { + "key": "retrieval_limit", + "titleKey": "settings:attachments.topK", + "descriptionKey": "settings:attachments.topKDesc", + "controllerType": "input", + "controllerProps": { "value": 3, "type": "number", "min": 1, "max": 20, "step": 1, "textAlign": "right" } + }, + { + "key": "retrieval_threshold", + "titleKey": "settings:attachments.threshold", + "descriptionKey": "settings:attachments.thresholdDesc", + "controllerType": "input", + "controllerProps": { "value": 0.3, "type": "number", "min": 0, "max": 1, "step": 0.01, "textAlign": "right" } + }, + { + "key": "chunk_size_tokens", + "titleKey": "settings:attachments.chunkSize", + "descriptionKey": "settings:attachments.chunkSizeDesc", + "controllerType": "input", + "controllerProps": { "value": 512, "type": "number", "min": 64, "max": 8192, "step": 64, "textAlign": "right" } + }, + { + "key": "overlap_tokens", + "titleKey": "settings:attachments.chunkOverlap", + "descriptionKey": "settings:attachments.chunkOverlapDesc", + "controllerType": "input", + "controllerProps": { "value": 64, "type": "number", "min": 0, "max": 1024, "step": 16, "textAlign": "right" } + }, + { + "key": "search_mode", + "titleKey": "settings:attachments.searchMode", + "descriptionKey": "settings:attachments.searchModeDesc", + "controllerType": "dropdown", + "controllerProps": { + "value": "auto", + "options": [ + { "name": "Auto (recommended)", "value": "auto" }, + { "name": "ANN (sqlite-vec)", "value": "ann" }, + { "name": "Linear", "value": "linear" } + ] + } + } +] diff --git a/extensions/rag-extension/src/env.d.ts b/extensions/rag-extension/src/env.d.ts new file mode 100644 index 000000000..512ce0505 --- /dev/null +++ b/extensions/rag-extension/src/env.d.ts @@ -0,0 +1,5 @@ +import type { SettingComponentProps } from '@janhq/core' +declare global { + const SETTINGS: SettingComponentProps[] +} +export {} diff --git a/extensions/rag-extension/src/global.d.ts b/extensions/rag-extension/src/global.d.ts new file mode 100644 index 000000000..f6fa6968e --- /dev/null +++ b/extensions/rag-extension/src/global.d.ts @@ -0,0 +1,14 @@ +import type { BaseExtension, ExtensionTypeEnum } from '@janhq/core' + +declare global { + interface Window { + core?: { + extensionManager: { + get(type: ExtensionTypeEnum): T | undefined + getByName(name: string): BaseExtension | undefined + } + } + } +} + +export {} diff --git a/extensions/rag-extension/src/index.ts b/extensions/rag-extension/src/index.ts new file mode 100644 index 000000000..21860c471 --- /dev/null +++ b/extensions/rag-extension/src/index.ts @@ -0,0 +1,328 @@ +import { RAGExtension, MCPTool, MCPToolCallResult, ExtensionTypeEnum, VectorDBExtension, type AttachmentInput, type SettingComponentProps, AIEngine } from '@janhq/core' +import './env.d' +import * as ragApi from '@janhq/tauri-plugin-rag-api' +import * as vecdbApi from '@janhq/tauri-plugin-vector-db-api' +import { getRAGTools, RETRIEVE, LIST_ATTACHMENTS, GET_CHUNKS } from './tools' + +export default class RagExtension extends RAGExtension { + private config = { + enabled: true, + retrievalLimit: 3, + retrievalThreshold: 0.3, + chunkSizeTokens: 512, + overlapTokens: 64, + searchMode: 'auto' as 'auto' | 'ann' | 'linear', + maxFileSizeMB: 20, + } + + async onLoad(): Promise { + const settings = structuredClone(SETTINGS) as SettingComponentProps[] + await this.registerSettings(settings) + this.config.enabled = await this.getSetting('enabled', this.config.enabled) + this.config.maxFileSizeMB = await this.getSetting('max_file_size_mb', this.config.maxFileSizeMB) + this.config.retrievalLimit = await this.getSetting('retrieval_limit', this.config.retrievalLimit) + this.config.retrievalThreshold = await this.getSetting('retrieval_threshold', this.config.retrievalThreshold) + this.config.chunkSizeTokens = await this.getSetting('chunk_size_tokens', this.config.chunkSizeTokens) + this.config.overlapTokens = await this.getSetting('overlap_tokens', this.config.overlapTokens) + this.config.searchMode = await this.getSetting('search_mode', this.config.searchMode) + + // Check ANN availability on load + try { + const vec = window.core?.extensionManager.get(ExtensionTypeEnum.VectorDB) as unknown as VectorDBExtension + if (vec?.getStatus) { + const status = await vec.getStatus() + console.log('[RAG] Vector DB ANN support:', status.ann_available ? '✓ AVAILABLE' : '✗ NOT AVAILABLE') + if (!status.ann_available) { + console.warn('[RAG] Warning: sqlite-vec not loaded. Collections will use slower linear search.') + } + } + } catch (e) { + console.error('[RAG] Failed to check ANN status:', e) + } + } + + onUnload(): void {} + + async getTools(): Promise { + return getRAGTools(this.config.retrievalLimit) + } + + async getToolNames(): Promise { + // Keep this in sync with getTools() but without building full schemas + return [LIST_ATTACHMENTS, RETRIEVE, GET_CHUNKS] + } + + async callTool(toolName: string, args: Record): Promise { + switch (toolName) { + case LIST_ATTACHMENTS: + return this.listAttachments(args) + case RETRIEVE: + return this.retrieve(args) + case GET_CHUNKS: + return this.getChunks(args) + default: + return { + error: `Unknown tool: ${toolName}`, + content: [{ type: 'text', text: `Unknown tool: ${toolName}` }], + } + } + } + + private async listAttachments(args: Record): Promise { + const threadId = String(args['thread_id'] || '') + if (!threadId) { + return { error: 'Missing thread_id', content: [{ type: 'text', text: 'Missing thread_id' }] } + } + try { + const vec = window.core?.extensionManager.get(ExtensionTypeEnum.VectorDB) as unknown as VectorDBExtension + const collection = `attachments_${threadId}` + if (!vec?.listAttachments) { + return { error: 'Vector DB extension missing listAttachments', content: [{ type: 'text', text: 'Vector DB extension missing listAttachments' }] } + } + const files = await vec.listAttachments(collection) + return { + error: '', + content: [ + { + type: 'text', + text: JSON.stringify({ thread_id: threadId, attachments: files || [] }), + }, + ], + } + } catch (e) { + const msg = e instanceof Error ? e.message : String(e) + return { error: msg, content: [{ type: 'text', text: `List attachments failed: ${msg}` }] } + } + } + + private async retrieve(args: Record): Promise { + const threadId = String(args['thread_id'] || '') + const query = String(args['query'] || '') + const fileIds = args['file_ids'] as string[] | undefined + + const s = this.config + const topK = (args['top_k'] as number) || s.retrievalLimit || 3 + const threshold = s.retrievalThreshold ?? 0.3 + const mode: 'auto' | 'ann' | 'linear' = s.searchMode || 'auto' + + if (s.enabled === false) { + return { + error: 'Attachments feature disabled', + content: [ + { + type: 'text', + text: 'Attachments are disabled in Settings. Enable them to use retrieval.', + }, + ], + } + } + if (!threadId || !query) { + return { + error: 'Missing thread_id or query', + content: [{ type: 'text', text: 'Missing required parameters' }], + } + } + + try { + // Resolve extensions + const vec = window.core?.extensionManager.get(ExtensionTypeEnum.VectorDB) as unknown as VectorDBExtension + if (!vec?.searchCollection) { + return { + error: 'RAG dependencies not available', + content: [ + { type: 'text', text: 'Vector DB extension not available' }, + ], + } + } + + const queryEmb = (await this.embedTexts([query]))?.[0] + if (!queryEmb) { + return { + error: 'Failed to compute embeddings', + content: [{ type: 'text', text: 'Failed to compute embeddings' }], + } + } + + const collection = `attachments_${threadId}` + const results = await vec.searchCollection( + collection, + queryEmb, + topK, + threshold, + mode, + fileIds + ) + + const payload = { + thread_id: threadId, + query, + citations: results?.map((r: any) => ({ + id: r.id, + text: r.text, + score: r.score, + file_id: r.file_id, + chunk_file_order: r.chunk_file_order + })) ?? [], + collection, + mode, + } + return { error: '', content: [{ type: 'text', text: JSON.stringify(payload) }] } + } catch (e) { + console.error('[RAG] Retrieve error:', e) + let msg = 'Unknown error' + if (e instanceof Error) { + msg = e.message + } else if (typeof e === 'string') { + msg = e + } else if (e && typeof e === 'object') { + msg = JSON.stringify(e) + } + return { error: msg, content: [{ type: 'text', text: `Retrieve failed: ${msg}` }] } + } + } + + private async getChunks(args: Record): Promise { + const threadId = String(args['thread_id'] || '') + const fileId = String(args['file_id'] || '') + const startOrder = args['start_order'] as number | undefined + const endOrder = args['end_order'] as number | undefined + + if (!threadId || !fileId || startOrder === undefined || endOrder === undefined) { + return { + error: 'Missing thread_id, file_id, start_order, or end_order', + content: [{ type: 'text', text: 'Missing required parameters' }], + } + } + + try { + const vec = window.core?.extensionManager.get(ExtensionTypeEnum.VectorDB) as unknown as VectorDBExtension + if (!vec?.getChunks) { + return { + error: 'Vector DB extension not available', + content: [{ type: 'text', text: 'Vector DB extension not available' }], + } + } + + const collection = `attachments_${threadId}` + const chunks = await vec.getChunks(collection, fileId, startOrder, endOrder) + + const payload = { + thread_id: threadId, + file_id: fileId, + chunks: chunks || [], + } + return { error: '', content: [{ type: 'text', text: JSON.stringify(payload) }] } + } catch (e) { + const msg = e instanceof Error ? e.message : String(e) + return { error: msg, content: [{ type: 'text', text: `Get chunks failed: ${msg}` }] } + } + } + + // Desktop-only ingestion by file paths + async ingestAttachments( + threadId: string, + files: AttachmentInput[] + ): Promise<{ filesProcessed: number; chunksInserted: number } | void> { + if (!threadId || !Array.isArray(files) || files.length === 0) return + + const vec = window.core?.extensionManager.get(ExtensionTypeEnum.VectorDB) as unknown as VectorDBExtension + if (!vec?.createCollection || !vec?.insertChunks) { + throw new Error('Vector DB extension not available') + } + + // Load settings + const s = this.config + const maxSize = (s?.enabled === false ? 0 : s?.maxFileSizeMB) || undefined + const chunkSize = s?.chunkSizeTokens as number | undefined + const chunkOverlap = s?.overlapTokens as number | undefined + + let totalChunks = 0 + let processed = 0 + const collection = `attachments_${threadId}` + let created = false + + for (const f of files) { + if (!f?.path) continue + if (maxSize && f.size && f.size > maxSize * 1024 * 1024) continue + + const type = f.type || 'application/octet-stream' + const chunks = await this.parseAndEmbed(f.path, type, { + chunkSize: chunkSize ?? 512, + chunkOverlap: chunkOverlap ?? 64, + meta: { file: { name: f.name || f.path.split(/[\\/]/).pop(), path: f.path, type: f.type, size: f.size }, threadId }, + }) + + if (!chunks?.length) { + processed += 1 + continue + } + + // Ensure collection + if (!created) { + await vec.createCollection(collection, chunks[0].embedding.length) + created = true + } + + await vec.insertChunks( + collection, + chunks.map((c) => ({ text: c.text, embedding: c.embedding, metadata: c.metadata })) + ) + totalChunks += chunks.length + processed += 1 + } + + return { filesProcessed: processed, chunksInserted: totalChunks } + } + + onSettingUpdate(key: string, value: T): void { + switch (key) { + case 'enabled': + this.config.enabled = Boolean(value) + break + case 'max_file_size_mb': + this.config.maxFileSizeMB = Number(value) + break + case 'retrieval_limit': + this.config.retrievalLimit = Number(value) + break + case 'retrieval_threshold': + this.config.retrievalThreshold = Number(value) + break + case 'chunk_size_tokens': + this.config.chunkSizeTokens = Number(value) + break + case 'overlap_tokens': + this.config.overlapTokens = Number(value) + break + case 'search_mode': + this.config.searchMode = String(value) as 'auto' | 'ann' | 'linear' + break + } + } + + // Locally implement embedding logic (previously in embeddings-extension) + private async embedTexts(texts: string[]): Promise { + const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> } + if (!llm?.embed) throw new Error('llamacpp extension not available') + const res = await llm.embed(texts) + const data: Array<{ embedding: number[]; index: number }> = res?.data || [] + const out: number[][] = new Array(texts.length) + for (const item of data) out[item.index] = item.embedding + return out + } + + private async parseAndEmbed( + filePath: string, + fileType: string, + opts: { chunkSize: number; chunkOverlap: number; meta?: Record } + ): Promise }>> { + const text = await ragApi.parseDocument(filePath, fileType) + const chunks = await vecdbApi.chunkText(text, opts.chunkSize, opts.chunkOverlap) + const embeddings = await this.embedTexts(chunks) + return chunks.map((text, i) => ({ + text, + embedding: embeddings[i], + metadata: opts.meta, + })) + } +} diff --git a/extensions/rag-extension/src/tools.ts b/extensions/rag-extension/src/tools.ts new file mode 100644 index 000000000..f2199ed86 --- /dev/null +++ b/extensions/rag-extension/src/tools.ts @@ -0,0 +1,58 @@ +import { MCPTool, RAG_INTERNAL_SERVER } from '@janhq/core' + +// Tool names +export const RETRIEVE = 'retrieve' +export const LIST_ATTACHMENTS = 'list_attachments' +export const GET_CHUNKS = 'get_chunks' + +export function getRAGTools(retrievalLimit: number): MCPTool[] { + const maxTopK = Math.max(1, Number(retrievalLimit ?? 3)) + + return [ + { + name: LIST_ATTACHMENTS, + description: + 'List files attached to the current thread. Thread is inferred automatically; you may optionally provide {"scope":"thread"}. Returns basic file info (name/path).', + inputSchema: { + type: 'object', + properties: { + scope: { type: 'string', enum: ['thread'], description: 'Retrieval scope; currently only thread is supported' }, + }, + required: ['scope'], + }, + server: RAG_INTERNAL_SERVER, + }, + { + name: RETRIEVE, + description: + 'Retrieve relevant snippets from locally attached, indexed documents. Use query only; do not pass raw document content. Thread context is inferred automatically; you may optionally provide {"scope":"thread"}. Use file_ids to search within specific files only.', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'User query to search for' }, + top_k: { type: 'number', description: 'Optional: Max citations to return. Adjust as needed.', minimum: 1, maximum: maxTopK, default: retrievalLimit ?? 3 }, + scope: { type: 'string', enum: ['thread'], description: 'Retrieval scope; currently only thread is supported' }, + file_ids: { type: 'array', items: { type: 'string' }, description: 'Optional: Filter search to specific file IDs from list_attachments' }, + }, + required: ['query', 'scope'], + }, + server: RAG_INTERNAL_SERVER, + }, + { + name: GET_CHUNKS, + description: + 'Retrieve chunks from a file by their order range. For a single chunk, use start_order = end_order. Thread context is inferred automatically; you may optionally provide {"scope":"thread"}.', + inputSchema: { + type: 'object', + properties: { + file_id: { type: 'string', description: 'File ID from list_attachments' }, + start_order: { type: 'number', description: 'Start of chunk range (inclusive, 0-indexed)' }, + end_order: { type: 'number', description: 'End of chunk range (inclusive, 0-indexed). For single chunk, use start_order = end_order.' }, + scope: { type: 'string', enum: ['thread'], description: 'Retrieval scope; currently only thread is supported' }, + }, + required: ['file_id', 'start_order', 'end_order', 'scope'], + }, + server: RAG_INTERNAL_SERVER, + }, + ] +} diff --git a/extensions/vector-db-extension/package.json b/extensions/vector-db-extension/package.json new file mode 100644 index 000000000..183d00d82 --- /dev/null +++ b/extensions/vector-db-extension/package.json @@ -0,0 +1,32 @@ +{ + "name": "@janhq/vector-db-extension", + "productName": "Vector DB", + "version": "0.1.0", + "description": "Vector DB integration using sqlite-vec if available with linear fallback", + "main": "dist/index.js", + "module": "dist/module.js", + "author": "Jan ", + "license": "AGPL-3.0", + "scripts": { + "build": "rolldown -c rolldown.config.mjs", + "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install" + }, + "devDependencies": { + "cpx": "1.5.0", + "rimraf": "6.0.1", + "rolldown": "1.0.0-beta.1", + "typescript": "5.9.2" + }, + "dependencies": { + "@janhq/core": "../../core/package.tgz", + "@janhq/tauri-plugin-vector-db-api": "link:../../src-tauri/plugins/tauri-plugin-vector-db" + }, + "files": [ + "dist/*", + "package.json" + ], + "installConfig": { + "hoistingLimits": "workspaces" + }, + "packageManager": "yarn@4.5.3" +} diff --git a/extensions/vector-db-extension/rolldown.config.mjs b/extensions/vector-db-extension/rolldown.config.mjs new file mode 100644 index 000000000..7c4b94017 --- /dev/null +++ b/extensions/vector-db-extension/rolldown.config.mjs @@ -0,0 +1,11 @@ +import { defineConfig } from 'rolldown' + +export default defineConfig({ + input: 'src/index.ts', + output: { + format: 'esm', + file: 'dist/index.js', + }, + platform: 'browser', + define: {}, +}) diff --git a/extensions/vector-db-extension/src/index.ts b/extensions/vector-db-extension/src/index.ts new file mode 100644 index 000000000..5eadad28f --- /dev/null +++ b/extensions/vector-db-extension/src/index.ts @@ -0,0 +1,58 @@ +import { VectorDBExtension, type SearchMode, type VectorDBStatus, type VectorChunkInput, type VectorSearchResult, type AttachmentFileInfo } from '@janhq/core' +import * as vecdb from '@janhq/tauri-plugin-vector-db-api' + +export default class VectorDBExt extends VectorDBExtension { + async onLoad(): Promise { + // no-op + } + + onUnload(): void {} + + async getStatus(): Promise { + return await vecdb.getStatus() as VectorDBStatus + } + + async createCollection(name: string, dimension: number): Promise { + return await vecdb.createCollection(name, dimension) + } + + async insertChunks(collection: string, chunks: VectorChunkInput[]): Promise { + return await vecdb.insertChunks(collection, chunks) + } + + async searchCollection( + collection: string, + query_embedding: number[], + limit: number, + threshold: number, + mode?: SearchMode, + fileIds?: string[] + ): Promise { + return await vecdb.searchCollection(collection, query_embedding, limit, threshold, mode, fileIds) as VectorSearchResult[] + } + + async deleteChunks(collection: string, ids: string[]): Promise { + return await vecdb.deleteChunks(collection, ids) + } + + async deleteCollection(collection: string): Promise { + return await vecdb.deleteCollection(collection) + } + + async chunkText(text: string, chunkSize: number, chunkOverlap: number): Promise { + return await vecdb.chunkText(text, chunkSize, chunkOverlap) + } + + async listAttachments(collection: string, limit?: number): Promise { + return await vecdb.listAttachments(collection, limit) as AttachmentFileInfo[] + } + + async getChunks( + collection: string, + fileId: string, + startOrder: number, + endOrder: number + ): Promise { + return await vecdb.getChunks(collection, fileId, startOrder, endOrder) as VectorSearchResult[] + } +} diff --git a/scripts/download-bin.mjs b/scripts/download-bin.mjs index 68f09bf5f..b6ef81bb2 100644 --- a/scripts/download-bin.mjs +++ b/scripts/download-bin.mjs @@ -56,6 +56,75 @@ async function decompress(filePath, targetDir) { } } +async function getJson(url, headers = {}) { + return new Promise((resolve, reject) => { + const opts = new URL(url) + opts.headers = { + 'User-Agent': 'jan-app', + 'Accept': 'application/vnd.github+json', + ...headers, + } + https + .get(opts, (res) => { + if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + return getJson(res.headers.location, headers).then(resolve, reject) + } + if (res.statusCode !== 200) { + reject(new Error(`GET ${url} failed with status ${res.statusCode}`)) + return + } + let data = '' + res.on('data', (chunk) => (data += chunk)) + res.on('end', () => { + try { + resolve(JSON.parse(data)) + } catch (e) { + reject(e) + } + }) + }) + .on('error', reject) + }) +} + +function matchSqliteVecAsset(assets, platform, arch) { + const osHints = + platform === 'darwin' + ? ['darwin', 'macos', 'apple-darwin'] + : platform === 'win32' + ? ['windows', 'win', 'msvc'] + : ['linux'] + + const archHints = arch === 'arm64' ? ['arm64', 'aarch64'] : ['x86_64', 'x64', 'amd64'] + const extHints = ['zip', 'tar.gz'] + + const lc = (s) => s.toLowerCase() + const candidates = assets + .filter((a) => a && a.browser_download_url && a.name) + .map((a) => ({ name: lc(a.name), url: a.browser_download_url })) + + // Prefer exact OS + arch matches + let matches = candidates.filter((c) => osHints.some((o) => c.name.includes(o)) && archHints.some((h) => c.name.includes(h)) && extHints.some((e) => c.name.endsWith(e))) + if (matches.length) return matches[0].url + // Fallback: OS only + matches = candidates.filter((c) => osHints.some((o) => c.name.includes(o)) && extHints.some((e) => c.name.endsWith(e))) + if (matches.length) return matches[0].url + // Last resort: any asset with shared library extension inside is unknown here, so pick any zip/tar.gz + matches = candidates.filter((c) => extHints.some((e) => c.name.endsWith(e))) + return matches.length ? matches[0].url : null +} + +async function fetchLatestSqliteVecUrl(platform, arch) { + try { + const rel = await getJson('https://api.github.com/repos/asg017/sqlite-vec/releases/latest') + const url = matchSqliteVecAsset(rel.assets || [], platform, arch) + return url + } catch (e) { + console.log('Failed to query sqlite-vec latest release:', e.message) + return null + } +} + function getPlatformArch() { const platform = os.platform() // 'darwin', 'linux', 'win32' const arch = os.arch() // 'x64', 'arm64', etc. @@ -266,6 +335,64 @@ async function main() { } console.log('UV downloaded.') + // ----- sqlite-vec (optional, ANN acceleration) ----- + try { + const binDir = 'src-tauri/resources/bin' + const platform = os.platform() + const ext = platform === 'darwin' ? 'dylib' : platform === 'win32' ? 'dll' : 'so' + const targetLibPath = path.join(binDir, `sqlite-vec.${ext}`) + + if (fs.existsSync(targetLibPath)) { + console.log(`sqlite-vec already present at ${targetLibPath}`) + } else { + let sqlvecUrl = await fetchLatestSqliteVecUrl(platform, os.arch()) + // Allow override via env if needed + if ((process.env.SQLVEC_URL || process.env.JAN_SQLITE_VEC_URL) && !sqlvecUrl) { + sqlvecUrl = process.env.SQLVEC_URL || process.env.JAN_SQLITE_VEC_URL + } + if (!sqlvecUrl) { + console.log('Could not determine sqlite-vec download URL; skipping (linear fallback will be used).') + } else { + console.log(`Downloading sqlite-vec from ${sqlvecUrl}...`) + const sqlvecArchive = path.join(tempBinDir, `sqlite-vec-download`) + const guessedExt = sqlvecUrl.endsWith('.zip') ? '.zip' : sqlvecUrl.endsWith('.tar.gz') ? '.tar.gz' : '' + const archivePath = sqlvecArchive + guessedExt + await download(sqlvecUrl, archivePath) + if (!guessedExt) { + console.log('Unknown archive type for sqlite-vec; expecting .zip or .tar.gz') + } else { + await decompress(archivePath, tempBinDir) + // Try to find a shared library in the extracted files + const candidates = [] + function walk(dir) { + for (const entry of fs.readdirSync(dir)) { + const full = path.join(dir, entry) + const stat = fs.statSync(full) + if (stat.isDirectory()) walk(full) + else if (full.endsWith(`.${ext}`)) candidates.push(full) + } + } + walk(tempBinDir) + if (candidates.length === 0) { + console.log('No sqlite-vec shared library found in archive; skipping copy.') + } else { + // Pick the first match and copy/rename to sqlite-vec. + const libSrc = candidates[0] + // Ensure we copy the FILE, not a directory (fs-extra copySync can copy dirs) + if (fs.statSync(libSrc).isFile()) { + fs.copyFileSync(libSrc, targetLibPath) + console.log(`sqlite-vec installed at ${targetLibPath}`) + } else { + console.log(`Found non-file at ${libSrc}; skipping.`) + } + } + } + } + } + } catch (err) { + console.log('sqlite-vec download step failed (non-fatal):', err) + } + console.log('Downloads completed.') } diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index da2ca059e..855542866 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -34,10 +34,12 @@ dependencies = [ "tauri-plugin-log", "tauri-plugin-opener", "tauri-plugin-os", + "tauri-plugin-rag", "tauri-plugin-shell", "tauri-plugin-single-instance", "tauri-plugin-store", "tauri-plugin-updater", + "tauri-plugin-vector-db", "tempfile", "thiserror 2.0.12", "tokio", @@ -63,6 +65,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "adobe-cmap-parser" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8abfa9a4688de8fc9f42b3f013b6fffec18ed8a554f5f113577e0b9b3212a3" +dependencies = [ + "pom", +] + [[package]] name = "aes" version = "0.8.4" @@ -598,6 +609,21 @@ dependencies = [ "system-deps", ] +[[package]] +name = "calamine" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47a4d6ea525ea187df1e3a1c4b23469b1cbe60c5bafc1c0ef14b2b8738a8303d" +dependencies = [ + "byteorder", + "codepage", + "encoding_rs", + "log", + "quick-xml 0.31.0", + "serde", + "zip 0.6.6", +] + [[package]] name = "camino" version = "1.1.10" @@ -690,6 +716,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chardetng" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea" +dependencies = [ + "cfg-if", + "encoding_rs", + "memchr", +] + [[package]] name = "chrono" version = "0.4.41" @@ -715,6 +752,15 @@ dependencies = [ "inout", ] +[[package]] +name = "codepage" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48f68d061bc2828ae826206326e61251aca94c1e4a5305cf52d9138639c918b4" +dependencies = [ + "encoding_rs", +] + [[package]] name = "combine" version = "4.6.7" @@ -941,6 +987,27 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +dependencies = [ + "memchr", +] + [[package]] name = "ctor" version = "0.2.9" @@ -1021,6 +1088,19 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "dashmap" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +dependencies = [ + "cfg-if", + "hashbrown 0.12.3", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "data-url" version = "0.3.1" @@ -1315,6 +1395,15 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "euclid" +version = "0.20.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad" +dependencies = [ + "num-traits", +] + [[package]] name = "event-listener" version = "5.4.1" @@ -1336,6 +1425,18 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -1960,6 +2061,9 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", +] [[package]] name = "hashbrown" @@ -1967,6 +2071,15 @@ version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "heck" version = "0.4.1" @@ -2009,6 +2122,35 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "html2text" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d21a727ee791bce84d364a69b0f84a5d99f06278adfe4dbd431d475ea28e338" +dependencies = [ + "dashmap", + "html5ever 0.26.0", + "markup5ever 0.11.0", + "tendril", + "thiserror 1.0.69", + "unicode-width", + "xml5ever", +] + +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever 0.11.0", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "html5ever" version = "0.29.1" @@ -2017,7 +2159,7 @@ checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" dependencies = [ "log", "mac", - "markup5ever", + "markup5ever 0.14.1", "match_token", ] @@ -2364,6 +2506,15 @@ dependencies = [ "serde", ] +[[package]] +name = "infer" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb33622da908807a06f9513c19b3c1ad50fab3e4137d82a78107d502075aa199" +dependencies = [ + "cfb", +] + [[package]] name = "infer" version = "0.19.0" @@ -2556,7 +2707,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02cb977175687f33fa4afa0c95c112b987ea1443e5a51c8f8ff27dc618270cc2" dependencies = [ "cssparser", - "html5ever", + "html5ever 0.29.1", "indexmap 2.10.0", "selectors", ] @@ -2628,6 +2779,17 @@ dependencies = [ "redox_syscall", ] +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -2665,6 +2827,24 @@ dependencies = [ "value-bag", ] +[[package]] +name = "lopdf" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5c8ecfc6c72051981c0459f75ccc585e7ff67c70829560cda8e647882a9abff" +dependencies = [ + "encoding_rs", + "flate2", + "indexmap 2.10.0", + "itoa", + "log", + "md-5", + "nom", + "rangemap", + "time", + "weezl", +] + [[package]] name = "lru-slab" version = "0.1.2" @@ -2686,6 +2866,20 @@ dependencies = [ "libc", ] +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf 0.10.1", + "phf_codegen 0.10.0", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "markup5ever" version = "0.14.1" @@ -2717,6 +2911,16 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.5" @@ -2738,6 +2942,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "minisign-verify" version = "0.2.4" @@ -2858,6 +3068,16 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "ntapi" version = "0.4.1" @@ -3426,6 +3646,21 @@ dependencies = [ "sha2", ] +[[package]] +name = "pdf-extract" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb3a5387b94b9053c1e69d8abfd4dd6dae7afda65a5c5279bc1f42ab39df575" +dependencies = [ + "adobe-cmap-parser", + "encoding_rs", + "euclid", + "lopdf", + "postscript", + "type1-encoding-parser", + "unicode-normalization", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -3472,6 +3707,16 @@ dependencies = [ "phf_shared 0.8.0", ] +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + [[package]] name = "phf_codegen" version = "0.11.3" @@ -3635,6 +3880,18 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "pom" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6" + +[[package]] +name = "postscript" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78451badbdaebaf17f053fd9152b3ffb33b516104eacb45e7864aaa9c712f306" + [[package]] name = "potential_utf" version = "0.1.2" @@ -3782,6 +4039,17 @@ dependencies = [ "psl-types", ] +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "encoding_rs", + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -3986,6 +4254,12 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rangemap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93e7e49bb0bf967717f7bd674458b3d6b0c5f48ec7e3038166026a69fc22223" + [[package]] name = "raw-window-handle" version = "0.5.2" @@ -4272,6 +4546,20 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "rusqlite" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" +dependencies = [ + "bitflags 2.9.1", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rust-ini" version = "0.21.2" @@ -5483,6 +5771,28 @@ dependencies = [ "thiserror 2.0.12", ] +[[package]] +name = "tauri-plugin-rag" +version = "0.1.0" +dependencies = [ + "calamine", + "chardetng", + "csv", + "encoding_rs", + "html2text", + "infer 0.15.0", + "log", + "pdf-extract", + "quick-xml 0.31.0", + "serde", + "serde_json", + "tauri", + "tauri-plugin", + "thiserror 2.0.12", + "tokio", + "zip 0.6.6", +] + [[package]] name = "tauri-plugin-shell" version = "2.3.0" @@ -5547,7 +5857,7 @@ dependencies = [ "flate2", "futures-util", "http 1.3.1", - "infer", + "infer 0.19.0", "log", "minisign-verify", "osakit", @@ -5568,6 +5878,22 @@ dependencies = [ "zip 4.3.0", ] +[[package]] +name = "tauri-plugin-vector-db" +version = "0.1.0" +dependencies = [ + "dirs", + "log", + "rusqlite", + "serde", + "serde_json", + "tauri", + "tauri-plugin", + "thiserror 2.0.12", + "tokio", + "uuid", +] + [[package]] name = "tauri-runtime" version = "2.8.0" @@ -5631,9 +5957,9 @@ dependencies = [ "ctor", "dunce", "glob", - "html5ever", + "html5ever 0.29.1", "http 1.3.1", - "infer", + "infer 0.19.0", "json-patch", "kuchikiki", "log", @@ -6090,6 +6416,15 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "type1-encoding-parser" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d6cc09e1a99c7e01f2afe4953789311a1c50baebbdac5b477ecf78e2e92a5b" +dependencies = [ + "pom", +] + [[package]] name = "typeid" version = "1.0.3" @@ -6160,12 +6495,27 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-segmentation" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unsafe-libyaml" version = "0.2.11" @@ -6629,6 +6979,12 @@ dependencies = [ "windows-core 0.61.2", ] +[[package]] +name = "weezl" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" + [[package]] name = "winapi" version = "0.3.9" @@ -7227,7 +7583,7 @@ dependencies = [ "dunce", "gdkx11", "gtk", - "html5ever", + "html5ever 0.29.1", "http 1.3.1", "javascriptcore-rs", "jni", @@ -7303,6 +7659,17 @@ version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fd8403733700263c6eb89f192880191f1b83e332f7a20371ddcf421c4a337c7" +[[package]] +name = "xml5ever" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" +dependencies = [ + "log", + "mac", + "markup5ever 0.11.0", +] + [[package]] name = "yoke" version = "0.8.0" diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 43738b032..c195916e8 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -77,6 +77,8 @@ tauri-plugin-dialog = "2.2.1" tauri-plugin-deep-link = { version = "2", optional = true } tauri-plugin-hardware = { path = "./plugins/tauri-plugin-hardware", optional = true } tauri-plugin-llamacpp = { path = "./plugins/tauri-plugin-llamacpp" } +tauri-plugin-vector-db = { path = "./plugins/tauri-plugin-vector-db" } +tauri-plugin-rag = { path = "./plugins/tauri-plugin-rag" } tauri-plugin-http = { version = "2", features = ["unsafe-headers"] } tauri-plugin-log = "2.0.0-rc" tauri-plugin-opener = "2.2.7" diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json index 5c5e7d48d..8d054b0c1 100644 --- a/src-tauri/capabilities/default.json +++ b/src-tauri/capabilities/default.json @@ -22,6 +22,8 @@ "core:webview:allow-create-webview-window", "opener:allow-open-url", "store:default", + "vector-db:default", + "rag:default", { "identifier": "http:default", "allow": [ diff --git a/src-tauri/capabilities/desktop.json b/src-tauri/capabilities/desktop.json index 41be646d3..83a34c4d3 100644 --- a/src-tauri/capabilities/desktop.json +++ b/src-tauri/capabilities/desktop.json @@ -23,6 +23,8 @@ "core:webview:allow-create-webview-window", "opener:allow-open-url", "store:default", + "vector-db:default", + "rag:default", "llamacpp:default", "deep-link:default", "hardware:default", @@ -60,4 +62,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/src-tauri/plugins/tauri-plugin-rag/.gitignore b/src-tauri/plugins/tauri-plugin-rag/.gitignore new file mode 100644 index 000000000..50d8e32e8 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/.gitignore @@ -0,0 +1,17 @@ +/.vs +.DS_Store +.Thumbs.db +*.sublime* +.idea/ +debug.log +package-lock.json +.vscode/settings.json +yarn.lock + +/.tauri +/target +Cargo.lock +node_modules/ + +dist-js +dist diff --git a/src-tauri/plugins/tauri-plugin-rag/Cargo.toml b/src-tauri/plugins/tauri-plugin-rag/Cargo.toml new file mode 100644 index 000000000..340873551 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "tauri-plugin-rag" +version = "0.1.0" +authors = ["Jan "] +description = "Tauri plugin for RAG utilities (document parsing, types)" +license = "MIT" +repository = "https://github.com/menloresearch/jan" +edition = "2021" +rust-version = "1.77.2" +exclude = ["/examples", "/dist-js", "/guest-js", "/node_modules"] +links = "tauri-plugin-rag" + +[dependencies] +tauri = { version = "2.8.5", default-features = false } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +thiserror = "2.0" +tokio = { version = "1", features = ["full"] } +log = "0.4" +pdf-extract = "0.7" +zip = "0.6" +quick-xml = { version = "0.31", features = ["serialize"] } +csv = "1.3" +calamine = "0.23" +html2text = "0.11" +chardetng = "0.1" +encoding_rs = "0.8" +infer = "0.15" + +[build-dependencies] +tauri-plugin = { version = "2.3.1", features = ["build"] } diff --git a/src-tauri/plugins/tauri-plugin-rag/build.rs b/src-tauri/plugins/tauri-plugin-rag/build.rs new file mode 100644 index 000000000..30c58872d --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/build.rs @@ -0,0 +1,7 @@ +fn main() { + tauri_plugin::Builder::new(&[ + "parse_document", + ]) + .build(); +} + diff --git a/src-tauri/plugins/tauri-plugin-rag/guest-js/index.ts b/src-tauri/plugins/tauri-plugin-rag/guest-js/index.ts new file mode 100644 index 000000000..9f7965159 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/guest-js/index.ts @@ -0,0 +1,6 @@ +import { invoke } from '@tauri-apps/api/core' + +export async function parseDocument(filePath: string, fileType: string): Promise { + // Send both snake_case and camelCase for compatibility across runtimes/builds + return await invoke('plugin:rag|parse_document', { filePath, fileType }) +} diff --git a/src-tauri/plugins/tauri-plugin-rag/package.json b/src-tauri/plugins/tauri-plugin-rag/package.json new file mode 100644 index 000000000..bac28917d --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/package.json @@ -0,0 +1,33 @@ +{ + "name": "@janhq/tauri-plugin-rag-api", + "version": "0.1.0", + "private": true, + "description": "Guest JS API for Jan RAG plugin", + "type": "module", + "types": "./dist-js/index.d.ts", + "main": "./dist-js/index.cjs", + "module": "./dist-js/index.js", + "exports": { + "types": "./dist-js/index.d.ts", + "import": "./dist-js/index.js", + "require": "./dist-js/index.cjs" + }, + "files": [ + "dist-js", + "README.md" + ], + "scripts": { + "build": "rollup -c", + "prepublishOnly": "yarn build", + "pretest": "yarn build" + }, + "dependencies": { + "@tauri-apps/api": ">=2.0.0-beta.6" + }, + "devDependencies": { + "@rollup/plugin-typescript": "^12.0.0", + "rollup": "^4.9.6", + "tslib": "^2.6.2", + "typescript": "^5.3.3" + } +} diff --git a/src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/commands/parse_document.toml b/src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/commands/parse_document.toml new file mode 100644 index 000000000..5cb5da40f --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/commands/parse_document.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-parse-document" +description = "Enables the parse_document command without any pre-configured scope." +commands.allow = ["parse_document"] + +[[permission]] +identifier = "deny-parse-document" +description = "Denies the parse_document command without any pre-configured scope." +commands.deny = ["parse_document"] diff --git a/src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/reference.md b/src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/reference.md new file mode 100644 index 000000000..148c91dfa --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/permissions/autogenerated/reference.md @@ -0,0 +1,43 @@ +## Default Permission + +Default permissions for the rag plugin + +#### This default permission set includes the following: + +- `allow-parse-document` + +## Permission Table + + + + + + + + + + + + + + + + + +
IdentifierDescription
+ +`rag:allow-parse-document` + + + +Enables the parse_document command without any pre-configured scope. + +
+ +`rag:deny-parse-document` + + + +Denies the parse_document command without any pre-configured scope. + +
diff --git a/src-tauri/plugins/tauri-plugin-rag/permissions/default.toml b/src-tauri/plugins/tauri-plugin-rag/permissions/default.toml new file mode 100644 index 000000000..3c8dd7537 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/permissions/default.toml @@ -0,0 +1,6 @@ +[default] +description = "Default permissions for the rag plugin" +permissions = [ + "allow-parse-document", +] + diff --git a/src-tauri/plugins/tauri-plugin-rag/permissions/schemas/schema.json b/src-tauri/plugins/tauri-plugin-rag/permissions/schemas/schema.json new file mode 100644 index 000000000..a4b5488ac --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/permissions/schemas/schema.json @@ -0,0 +1,318 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "PermissionFile", + "description": "Permission file that can define a default permission, a set of permissions or a list of inlined permissions.", + "type": "object", + "properties": { + "default": { + "description": "The default permission set for the plugin", + "anyOf": [ + { + "$ref": "#/definitions/DefaultPermission" + }, + { + "type": "null" + } + ] + }, + "set": { + "description": "A list of permissions sets defined", + "type": "array", + "items": { + "$ref": "#/definitions/PermissionSet" + } + }, + "permission": { + "description": "A list of inlined permissions", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/Permission" + } + } + }, + "definitions": { + "DefaultPermission": { + "description": "The default permission set of the plugin.\n\nWorks similarly to a permission with the \"default\" identifier.", + "type": "object", + "required": [ + "permissions" + ], + "properties": { + "version": { + "description": "The version of the permission.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 1.0 + }, + "description": { + "description": "Human-readable description of what the permission does. Tauri convention is to use `

` headings in markdown content for Tauri documentation generation purposes.", + "type": [ + "string", + "null" + ] + }, + "permissions": { + "description": "All permissions this set contains.", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "PermissionSet": { + "description": "A set of direct permissions grouped together under a new name.", + "type": "object", + "required": [ + "description", + "identifier", + "permissions" + ], + "properties": { + "identifier": { + "description": "A unique identifier for the permission.", + "type": "string" + }, + "description": { + "description": "Human-readable description of what the permission does.", + "type": "string" + }, + "permissions": { + "description": "All permissions this set contains.", + "type": "array", + "items": { + "$ref": "#/definitions/PermissionKind" + } + } + } + }, + "Permission": { + "description": "Descriptions of explicit privileges of commands.\n\nIt can enable commands to be accessible in the frontend of the application.\n\nIf the scope is defined it can be used to fine grain control the access of individual or multiple commands.", + "type": "object", + "required": [ + "identifier" + ], + "properties": { + "version": { + "description": "The version of the permission.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 1.0 + }, + "identifier": { + "description": "A unique identifier for the permission.", + "type": "string" + }, + "description": { + "description": "Human-readable description of what the permission does. Tauri internal convention is to use `

` headings in markdown content for Tauri documentation generation purposes.", + "type": [ + "string", + "null" + ] + }, + "commands": { + "description": "Allowed or denied commands when using this permission.", + "default": { + "allow": [], + "deny": [] + }, + "allOf": [ + { + "$ref": "#/definitions/Commands" + } + ] + }, + "scope": { + "description": "Allowed or denied scoped when using this permission.", + "allOf": [ + { + "$ref": "#/definitions/Scopes" + } + ] + }, + "platforms": { + "description": "Target platforms this permission applies. By default all platforms are affected by this permission.", + "type": [ + "array", + "null" + ], + "items": { + "$ref": "#/definitions/Target" + } + } + } + }, + "Commands": { + "description": "Allowed and denied commands inside a permission.\n\nIf two commands clash inside of `allow` and `deny`, it should be denied by default.", + "type": "object", + "properties": { + "allow": { + "description": "Allowed command.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "deny": { + "description": "Denied command, which takes priority.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "Scopes": { + "description": "An argument for fine grained behavior control of Tauri commands.\n\nIt can be of any serde serializable type and is used to allow or prevent certain actions inside a Tauri command. The configured scope is passed to the command and will be enforced by the command implementation.\n\n## Example\n\n```json { \"allow\": [{ \"path\": \"$HOME/**\" }], \"deny\": [{ \"path\": \"$HOME/secret.txt\" }] } ```", + "type": "object", + "properties": { + "allow": { + "description": "Data that defines what is allowed by the scope.", + "type": [ + "array", + "null" + ], + "items": { + "$ref": "#/definitions/Value" + } + }, + "deny": { + "description": "Data that defines what is denied by the scope. This should be prioritized by validation logic.", + "type": [ + "array", + "null" + ], + "items": { + "$ref": "#/definitions/Value" + } + } + } + }, + "Value": { + "description": "All supported ACL values.", + "anyOf": [ + { + "description": "Represents a null JSON value.", + "type": "null" + }, + { + "description": "Represents a [`bool`].", + "type": "boolean" + }, + { + "description": "Represents a valid ACL [`Number`].", + "allOf": [ + { + "$ref": "#/definitions/Number" + } + ] + }, + { + "description": "Represents a [`String`].", + "type": "string" + }, + { + "description": "Represents a list of other [`Value`]s.", + "type": "array", + "items": { + "$ref": "#/definitions/Value" + } + }, + { + "description": "Represents a map of [`String`] keys to [`Value`]s.", + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/Value" + } + } + ] + }, + "Number": { + "description": "A valid ACL number.", + "anyOf": [ + { + "description": "Represents an [`i64`].", + "type": "integer", + "format": "int64" + }, + { + "description": "Represents a [`f64`].", + "type": "number", + "format": "double" + } + ] + }, + "Target": { + "description": "Platform target.", + "oneOf": [ + { + "description": "MacOS.", + "type": "string", + "enum": [ + "macOS" + ] + }, + { + "description": "Windows.", + "type": "string", + "enum": [ + "windows" + ] + }, + { + "description": "Linux.", + "type": "string", + "enum": [ + "linux" + ] + }, + { + "description": "Android.", + "type": "string", + "enum": [ + "android" + ] + }, + { + "description": "iOS.", + "type": "string", + "enum": [ + "iOS" + ] + } + ] + }, + "PermissionKind": { + "type": "string", + "oneOf": [ + { + "description": "Enables the parse_document command without any pre-configured scope.", + "type": "string", + "const": "allow-parse-document", + "markdownDescription": "Enables the parse_document command without any pre-configured scope." + }, + { + "description": "Denies the parse_document command without any pre-configured scope.", + "type": "string", + "const": "deny-parse-document", + "markdownDescription": "Denies the parse_document command without any pre-configured scope." + }, + { + "description": "Default permissions for the rag plugin\n#### This default permission set includes:\n\n- `allow-parse-document`", + "type": "string", + "const": "default", + "markdownDescription": "Default permissions for the rag plugin\n#### This default permission set includes:\n\n- `allow-parse-document`" + } + ] + } + } +} \ No newline at end of file diff --git a/src-tauri/plugins/tauri-plugin-rag/rollup.config.js b/src-tauri/plugins/tauri-plugin-rag/rollup.config.js new file mode 100644 index 000000000..5047bf72d --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/rollup.config.js @@ -0,0 +1,32 @@ +import { readFileSync } from 'node:fs' +import { dirname, join } from 'node:path' +import { cwd } from 'node:process' +import typescript from '@rollup/plugin-typescript' + +const pkg = JSON.parse(readFileSync(join(cwd(), 'package.json'), 'utf8')) + +export default { + input: 'guest-js/index.ts', + output: [ + { + file: pkg.exports.import, + format: 'esm' + }, + { + file: pkg.exports.require, + format: 'cjs' + } + ], + plugins: [ + typescript({ + declaration: true, + declarationDir: dirname(pkg.exports.import) + }) + ], + external: [ + /^@tauri-apps\/api/, + ...Object.keys(pkg.dependencies || {}), + ...Object.keys(pkg.peerDependencies || {}) + ] +} + diff --git a/src-tauri/plugins/tauri-plugin-rag/src/commands.rs b/src-tauri/plugins/tauri-plugin-rag/src/commands.rs new file mode 100644 index 000000000..6f2a0f112 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/src/commands.rs @@ -0,0 +1,12 @@ +use crate::{RagError, parser}; + +#[tauri::command] +pub async fn parse_document( + _app: tauri::AppHandle, + file_path: String, + file_type: String, +) -> Result { + log::info!("Parsing document: {} (type: {})", file_path, file_type); + let res = parser::parse_document(&file_path, &file_type); + res +} diff --git a/src-tauri/plugins/tauri-plugin-rag/src/error.rs b/src-tauri/plugins/tauri-plugin-rag/src/error.rs new file mode 100644 index 000000000..fe693130b --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/src/error.rs @@ -0,0 +1,20 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, thiserror::Error, Serialize, Deserialize)] +pub enum RagError { + #[error("Failed to parse document: {0}")] + ParseError(String), + + #[error("Unsupported file type: {0}")] + UnsupportedFileType(String), + + #[error("IO error: {0}")] + IoError(String), +} + +impl From for RagError { + fn from(err: std::io::Error) -> Self { + RagError::IoError(err.to_string()) + } +} + diff --git a/src-tauri/plugins/tauri-plugin-rag/src/lib.rs b/src-tauri/plugins/tauri-plugin-rag/src/lib.rs new file mode 100644 index 000000000..1c66e3388 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/src/lib.rs @@ -0,0 +1,20 @@ +use tauri::{ + plugin::{Builder, TauriPlugin}, + Runtime, +}; + +mod parser; +mod error; +mod commands; + +pub use error::RagError; + +pub fn init() -> TauriPlugin { + Builder::new("rag") + .invoke_handler(tauri::generate_handler![ + commands::parse_document, + ]) + .setup(|_app, _api| Ok(())) + .build() +} + diff --git a/src-tauri/plugins/tauri-plugin-rag/src/parser.rs b/src-tauri/plugins/tauri-plugin-rag/src/parser.rs new file mode 100644 index 000000000..2c4d99dad --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/src/parser.rs @@ -0,0 +1,258 @@ +use crate::RagError; +use std::fs; +use std::io::{Read, Cursor}; +use zip::read::ZipArchive; +use quick_xml::events::Event; +use quick_xml::Reader; +use csv as csv_crate; +use calamine::{Reader as _, open_workbook_auto, DataType}; +use html2text; +use chardetng::EncodingDetector; +use infer; +use std::borrow::Cow; + +pub fn parse_pdf(file_path: &str) -> Result { + let bytes = fs::read(file_path)?; + pdf_extract::extract_text_from_mem(&bytes) + .map_err(|e| RagError::ParseError(format!("PDF parse error: {}", e))) +} + +pub fn parse_text(file_path: &str) -> Result { + read_text_auto(file_path) +} + +pub fn parse_document(file_path: &str, file_type: &str) -> Result { + match file_type.to_lowercase().as_str() { + "pdf" | "application/pdf" => parse_pdf(file_path), + "txt" | "text/plain" | "md" | "text/markdown" => parse_text(file_path), + "csv" | "text/csv" => parse_csv(file_path), + // Excel family via calamine + "xlsx" + | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + | "xls" + | "application/vnd.ms-excel" + | "ods" + | "application/vnd.oasis.opendocument.spreadsheet" => parse_spreadsheet(file_path), + // PowerPoint + "pptx" + | "application/vnd.openxmlformats-officedocument.presentationml.presentation" => parse_pptx(file_path), + // HTML + "html" | "htm" | "text/html" => parse_html(file_path), + "docx" + | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => { + parse_docx(file_path) + } + other => { + // Try MIME sniffing when extension or MIME is unknown + if let Ok(Some(k)) = infer::get_from_path(file_path) { + let mime = k.mime_type(); + return parse_document(file_path, mime); + } + Err(RagError::UnsupportedFileType(other.to_string())) + } + } +} + +fn parse_docx(file_path: &str) -> Result { + let file = std::fs::File::open(file_path)?; + let mut zip = ZipArchive::new(file).map_err(|e| RagError::ParseError(e.to_string()))?; + + // Standard DOCX stores document text at word/document.xml + let mut doc_xml = match zip.by_name("word/document.xml") { + Ok(f) => f, + Err(_) => return Err(RagError::ParseError("document.xml not found".into())), + }; + let mut xml_content = String::new(); + doc_xml + .read_to_string(&mut xml_content) + .map_err(|e| RagError::ParseError(e.to_string()))?; + + // Parse XML and extract text from w:t nodes; add newlines on w:p boundaries + let mut reader = Reader::from_str(&xml_content); + reader.trim_text(true); + let mut buf = Vec::new(); + let mut result = String::new(); + let mut in_text = false; + + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(e)) => { + let name: String = reader + .decoder() + .decode(e.name().as_ref()) + .unwrap_or(Cow::Borrowed("")) + .into_owned(); + if name.ends_with(":t") || name == "w:t" || name == "t" { + in_text = true; + } + } + Ok(Event::End(e)) => { + let name: String = reader + .decoder() + .decode(e.name().as_ref()) + .unwrap_or(Cow::Borrowed("")) + .into_owned(); + if name.ends_with(":t") || name == "w:t" || name == "t" { + in_text = false; + result.push(' '); + } + if name.ends_with(":p") || name == "w:p" || name == "p" { + // Paragraph end – add newline + result.push_str("\n\n"); + } + } + Ok(Event::Text(t)) => { + if in_text { + let text = t.unescape().unwrap_or_default(); + result.push_str(&text); + } + } + Ok(Event::Eof) => break, + Err(e) => return Err(RagError::ParseError(e.to_string())), + _ => {} + } + } + + // Normalize whitespace + let normalized = result + .lines() + .map(|l| l.trim()) + .filter(|l| !l.is_empty()) + .collect::>() + .join("\n"); + Ok(normalized) +} + +fn parse_csv(file_path: &str) -> Result { + let mut rdr = csv_crate::ReaderBuilder::new() + .has_headers(false) + .flexible(true) + .from_path(file_path) + .map_err(|e| RagError::ParseError(e.to_string()))?; + let mut out = String::new(); + for rec in rdr.records() { + let rec = rec.map_err(|e| RagError::ParseError(e.to_string()))?; + out.push_str(&rec.iter().collect::>().join(", ")); + out.push('\n'); + } + Ok(out) +} + +fn parse_spreadsheet(file_path: &str) -> Result { + let mut workbook = open_workbook_auto(file_path) + .map_err(|e| RagError::ParseError(e.to_string()))?; + let mut out = String::new(); + for sheet_name in workbook.sheet_names().to_owned() { + if let Ok(range) = workbook.worksheet_range(&sheet_name) { + out.push_str(&format!("# Sheet: {}\n", sheet_name)); + for row in range.rows() { + let cells = row + .iter() + .map(|c| match c { + DataType::Empty => "".to_string(), + DataType::String(s) => s.to_string(), + DataType::Float(f) => format!("{}", f), + DataType::Int(i) => i.to_string(), + DataType::Bool(b) => b.to_string(), + DataType::DateTime(f) => format!("{}", f), + other => other.to_string(), + }) + .collect::>() + .join("\t"); + out.push_str(&cells); + out.push('\n'); + } + out.push_str("\n"); + } + } + Ok(out) +} + +fn parse_pptx(file_path: &str) -> Result { + let file = std::fs::File::open(file_path)?; + let mut zip = ZipArchive::new(file).map_err(|e| RagError::ParseError(e.to_string()))?; + + // Collect slide files: ppt/slides/slide*.xml + let mut slides = Vec::new(); + for i in 0..zip.len() { + let name = zip.by_index(i).map(|f| f.name().to_string()).unwrap_or_default(); + if name.starts_with("ppt/slides/") && name.ends_with(".xml") { + slides.push(name); + } + } + slides.sort(); + + let mut output = String::new(); + for slide_name in slides { + let mut file = zip.by_name(&slide_name).map_err(|e| RagError::ParseError(e.to_string()))?; + let mut xml = String::new(); + file.read_to_string(&mut xml).map_err(|e| RagError::ParseError(e.to_string()))?; + output.push_str(&extract_pptx_text(&xml)); + output.push_str("\n\n"); + } + Ok(output) +} + +fn extract_pptx_text(xml: &str) -> String { + let mut reader = Reader::from_str(xml); + reader.trim_text(true); + let mut buf = Vec::new(); + let mut result = String::new(); + let mut in_text = false; + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(e)) => { + let name: String = reader + .decoder() + .decode(e.name().as_ref()) + .unwrap_or(Cow::Borrowed("")) + .into_owned(); + if name.ends_with(":t") || name == "a:t" || name == "t" { + in_text = true; + } + } + Ok(Event::End(e)) => { + let name: String = reader + .decoder() + .decode(e.name().as_ref()) + .unwrap_or(Cow::Borrowed("")) + .into_owned(); + if name.ends_with(":t") || name == "a:t" || name == "t" { + in_text = false; + result.push(' '); + } + } + Ok(Event::Text(t)) => { + if in_text { + let text = t.unescape().unwrap_or_default(); + result.push_str(&text); + } + } + Ok(Event::Eof) => break, + Err(_) => break, + _ => {} + } + } + result +} + +fn parse_html(file_path: &str) -> Result { + let html = read_text_auto(file_path)?; + // 80-column wrap default + Ok(html2text::from_read(Cursor::new(html), 80)) +} + +fn read_text_auto(file_path: &str) -> Result { + let bytes = fs::read(file_path)?; + // Detect encoding + let mut detector = EncodingDetector::new(); + detector.feed(&bytes, true); + let enc = detector.guess(None, true); + let (decoded, _, had_errors) = enc.decode(&bytes); + if had_errors { + // fallback to UTF-8 lossy + Ok(String::from_utf8_lossy(&bytes).to_string()) + } else { + Ok(decoded.to_string()) + } +} diff --git a/src-tauri/plugins/tauri-plugin-rag/tsconfig.json b/src-tauri/plugins/tauri-plugin-rag/tsconfig.json new file mode 100644 index 000000000..60bc6a8eb --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-rag/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2021", + "module": "esnext", + "moduleResolution": "bundler", + "skipLibCheck": true, + "strict": true, + "noUnusedLocals": true, + "noImplicitAny": true, + "noEmit": true + }, + "include": ["guest-js/*.ts"], + "exclude": ["dist-js", "node_modules"] +} + diff --git a/src-tauri/plugins/tauri-plugin-vector-db/.gitignore b/src-tauri/plugins/tauri-plugin-vector-db/.gitignore new file mode 100644 index 000000000..50d8e32e8 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/.gitignore @@ -0,0 +1,17 @@ +/.vs +.DS_Store +.Thumbs.db +*.sublime* +.idea/ +debug.log +package-lock.json +.vscode/settings.json +yarn.lock + +/.tauri +/target +Cargo.lock +node_modules/ + +dist-js +dist diff --git a/src-tauri/plugins/tauri-plugin-vector-db/Cargo.toml b/src-tauri/plugins/tauri-plugin-vector-db/Cargo.toml new file mode 100644 index 000000000..eb377c157 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "tauri-plugin-vector-db" +version = "0.1.0" +authors = ["Jan "] +description = "Tauri plugin for vector storage and similarity search" +license = "MIT" +repository = "https://github.com/menloresearch/jan" +edition = "2021" +rust-version = "1.77.2" +exclude = ["/examples", "/dist-js", "/guest-js", "/node_modules"] +links = "tauri-plugin-vector-db" + +[dependencies] +tauri = { version = "2.8.5", default-features = false } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +thiserror = "2.0" +tokio = { version = "1", features = ["full"] } +log = "0.4" +rusqlite = { version = "0.32", features = ["bundled", "load_extension"] } +uuid = { version = "1.7", features = ["v4", "serde"] } +dirs = "6.0.0" + +[build-dependencies] +tauri-plugin = { version = "2.3.1", features = ["build"] } diff --git a/src-tauri/plugins/tauri-plugin-vector-db/build.rs b/src-tauri/plugins/tauri-plugin-vector-db/build.rs new file mode 100644 index 000000000..71c8c3a52 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/build.rs @@ -0,0 +1,12 @@ +fn main() { + tauri_plugin::Builder::new(&[ + "create_collection", + "insert_chunks", + "search_collection", + "delete_chunks", + "delete_collection", + "chunk_text", + "get_status", + ]) + .build(); +} diff --git a/src-tauri/plugins/tauri-plugin-vector-db/guest-js/index.ts b/src-tauri/plugins/tauri-plugin-vector-db/guest-js/index.ts new file mode 100644 index 000000000..4b65786a5 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/guest-js/index.ts @@ -0,0 +1,106 @@ +import { invoke } from '@tauri-apps/api/core' + +export type SearchMode = 'auto' | 'ann' | 'linear' + +export interface ChunkInput { + id?: string + text: string + embedding: number[] + metadata?: Record +} + +export interface SearchResult { + id: string + text: string + score?: number + file_id: string + chunk_file_order: number +} + +export interface Status { + ann_available: boolean +} + +export interface AttachmentFileInfo { + id: string + name?: string + path?: string + type?: string + size?: number + chunk_count: number +} + +// Events +// Events are not exported in guest-js to keep API minimal + +export async function getStatus(): Promise { + return await invoke('plugin:vector-db|get_status') +} + +export async function createCollection(name: string, dimension: number): Promise { + // Use camelCase param name `dimension` to match Tauri v2 argument keys + return await invoke('plugin:vector-db|create_collection', { name, dimension }) +} + +export async function insertChunks( + collection: string, + chunks: ChunkInput[] +): Promise { + return await invoke('plugin:vector-db|insert_chunks', { collection, chunks }) +} + +export async function searchCollection( + collection: string, + queryEmbedding: number[], + limit: number, + threshold: number, + mode?: SearchMode, + fileIds?: string[] +): Promise { + return await invoke('plugin:vector-db|search_collection', { + collection, + queryEmbedding, + limit, + threshold, + mode, + fileIds, + }) +} + +export async function deleteChunks(collection: string, ids: string[]): Promise { + return await invoke('plugin:vector-db|delete_chunks', { collection, ids }) +} + +export async function deleteCollection(collection: string): Promise { + return await invoke('plugin:vector-db|delete_collection', { collection }) +} + +export async function chunkText( + text: string, + chunkSize: number, + chunkOverlap: number +): Promise { + // Use snake_case to match Rust command parameter names + return await invoke('plugin:vector-db|chunk_text', { text, chunkSize, chunkOverlap }) +} + +export async function listAttachments( + collection: string, + limit?: number +): Promise { + return await invoke('plugin:vector-db|list_attachments', { collection, limit }) +} + +export async function getChunks( + collection: string, + fileId: string, + startOrder: number, + endOrder: number +): Promise { + return await invoke('plugin:vector-db|get_chunks', { + collection, + fileId, + startOrder, + endOrder, + }) +} diff --git a/src-tauri/plugins/tauri-plugin-vector-db/package.json b/src-tauri/plugins/tauri-plugin-vector-db/package.json new file mode 100644 index 000000000..d2db2bbbe --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/package.json @@ -0,0 +1,33 @@ +{ + "name": "@janhq/tauri-plugin-vector-db-api", + "version": "0.1.0", + "private": true, + "description": "Guest JS API for Jan vector DB plugin", + "type": "module", + "types": "./dist-js/index.d.ts", + "main": "./dist-js/index.cjs", + "module": "./dist-js/index.js", + "exports": { + "types": "./dist-js/index.d.ts", + "import": "./dist-js/index.js", + "require": "./dist-js/index.cjs" + }, + "files": [ + "dist-js", + "README.md" + ], + "scripts": { + "build": "rollup -c", + "prepublishOnly": "yarn build", + "pretest": "yarn build" + }, + "dependencies": { + "@tauri-apps/api": ">=2.0.0-beta.6" + }, + "devDependencies": { + "@rollup/plugin-typescript": "^12.0.0", + "rollup": "^4.9.6", + "tslib": "^2.6.2", + "typescript": "^5.3.3" + } +} diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/chunk_text.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/chunk_text.toml new file mode 100644 index 000000000..341a0a194 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/chunk_text.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-chunk-text" +description = "Enables the chunk_text command without any pre-configured scope." +commands.allow = ["chunk_text"] + +[[permission]] +identifier = "deny-chunk-text" +description = "Denies the chunk_text command without any pre-configured scope." +commands.deny = ["chunk_text"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/create_collection.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/create_collection.toml new file mode 100644 index 000000000..402644497 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/create_collection.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-create-collection" +description = "Enables the create_collection command without any pre-configured scope." +commands.allow = ["create_collection"] + +[[permission]] +identifier = "deny-create-collection" +description = "Denies the create_collection command without any pre-configured scope." +commands.deny = ["create_collection"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_chunks.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_chunks.toml new file mode 100644 index 000000000..ecf2055a8 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_chunks.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-delete-chunks" +description = "Enables the delete_chunks command without any pre-configured scope." +commands.allow = ["delete_chunks"] + +[[permission]] +identifier = "deny-delete-chunks" +description = "Denies the delete_chunks command without any pre-configured scope." +commands.deny = ["delete_chunks"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_collection.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_collection.toml new file mode 100644 index 000000000..5a24329cb --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/delete_collection.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-delete-collection" +description = "Enables the delete_collection command without any pre-configured scope." +commands.allow = ["delete_collection"] + +[[permission]] +identifier = "deny-delete-collection" +description = "Denies the delete_collection command without any pre-configured scope." +commands.deny = ["delete_collection"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_chunks.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_chunks.toml new file mode 100644 index 000000000..6dc03e311 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_chunks.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-get-chunks" +description = "Enables the get_chunks command without any pre-configured scope." +commands.allow = ["get_chunks"] + +[[permission]] +identifier = "deny-get-chunks" +description = "Denies the get_chunks command without any pre-configured scope." +commands.deny = ["get_chunks"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_status.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_status.toml new file mode 100644 index 000000000..ff573a743 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/get_status.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-get-status" +description = "Enables the get_status command without any pre-configured scope." +commands.allow = ["get_status"] + +[[permission]] +identifier = "deny-get-status" +description = "Denies the get_status command without any pre-configured scope." +commands.deny = ["get_status"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/insert_chunks.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/insert_chunks.toml new file mode 100644 index 000000000..c83e268d2 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/insert_chunks.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-insert-chunks" +description = "Enables the insert_chunks command without any pre-configured scope." +commands.allow = ["insert_chunks"] + +[[permission]] +identifier = "deny-insert-chunks" +description = "Denies the insert_chunks command without any pre-configured scope." +commands.deny = ["insert_chunks"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/list_attachments.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/list_attachments.toml new file mode 100644 index 000000000..0daac76c7 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/list_attachments.toml @@ -0,0 +1,14 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-list-attachments" +description = "Enables the list_attachments command without any pre-configured scope." +commands.allow = ["list_attachments"] + +[[permission]] +identifier = "deny-list-attachments" +description = "Denies the list_attachments command without any pre-configured scope." +commands.deny = ["list_attachments"] + diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/search_collection.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/search_collection.toml new file mode 100644 index 000000000..e408b935c --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/commands/search_collection.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-search-collection" +description = "Enables the search_collection command without any pre-configured scope." +commands.allow = ["search_collection"] + +[[permission]] +identifier = "deny-search-collection" +description = "Denies the search_collection command without any pre-configured scope." +commands.deny = ["search_collection"] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/reference.md b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/reference.md new file mode 100644 index 000000000..78663f5aa --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/autogenerated/reference.md @@ -0,0 +1,259 @@ +## Default Permission + +Default permissions for the vector-db plugin + +#### This default permission set includes the following: + +- `allow-get-status` +- `allow-create-collection` +- `allow-insert-chunks` +- `allow-search-collection` +- `allow-delete-chunks` +- `allow-delete-collection` +- `allow-chunk-text` +- `allow-list-attachments` +- `allow-get-chunks` + +## Permission Table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
IdentifierDescription
+ +`vector-db:allow-chunk-text` + + + +Enables the chunk_text command without any pre-configured scope. + +
+ +`vector-db:deny-chunk-text` + + + +Denies the chunk_text command without any pre-configured scope. + +
+ +`vector-db:allow-create-collection` + + + +Enables the create_collection command without any pre-configured scope. + +
+ +`vector-db:deny-create-collection` + + + +Denies the create_collection command without any pre-configured scope. + +
+ +`vector-db:allow-delete-chunks` + + + +Enables the delete_chunks command without any pre-configured scope. + +
+ +`vector-db:deny-delete-chunks` + + + +Denies the delete_chunks command without any pre-configured scope. + +
+ +`vector-db:allow-delete-collection` + + + +Enables the delete_collection command without any pre-configured scope. + +
+ +`vector-db:deny-delete-collection` + + + +Denies the delete_collection command without any pre-configured scope. + +
+ +`vector-db:allow-get-chunks` + + + +Enables the get_chunks command without any pre-configured scope. + +
+ +`vector-db:deny-get-chunks` + + + +Denies the get_chunks command without any pre-configured scope. + +
+ +`vector-db:allow-get-status` + + + +Enables the get_status command without any pre-configured scope. + +
+ +`vector-db:deny-get-status` + + + +Denies the get_status command without any pre-configured scope. + +
+ +`vector-db:allow-insert-chunks` + + + +Enables the insert_chunks command without any pre-configured scope. + +
+ +`vector-db:deny-insert-chunks` + + + +Denies the insert_chunks command without any pre-configured scope. + +
+ +`vector-db:allow-list-attachments` + + + +Enables the list_attachments command without any pre-configured scope. + +
+ +`vector-db:deny-list-attachments` + + + +Denies the list_attachments command without any pre-configured scope. + +
+ +`vector-db:allow-search-collection` + + + +Enables the search_collection command without any pre-configured scope. + +
+ +`vector-db:deny-search-collection` + + + +Denies the search_collection command without any pre-configured scope. + +
diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/default.toml b/src-tauri/plugins/tauri-plugin-vector-db/permissions/default.toml new file mode 100644 index 000000000..1e4233865 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/default.toml @@ -0,0 +1,13 @@ +[default] +description = "Default permissions for the vector-db plugin" +permissions = [ + "allow-get-status", + "allow-create-collection", + "allow-insert-chunks", + "allow-search-collection", + "allow-delete-chunks", + "allow-delete-collection", + "allow-chunk-text", + "allow-list-attachments", + "allow-get-chunks", +] diff --git a/src-tauri/plugins/tauri-plugin-vector-db/permissions/schemas/schema.json b/src-tauri/plugins/tauri-plugin-vector-db/permissions/schemas/schema.json new file mode 100644 index 000000000..1192c07ee --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/permissions/schemas/schema.json @@ -0,0 +1,414 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "PermissionFile", + "description": "Permission file that can define a default permission, a set of permissions or a list of inlined permissions.", + "type": "object", + "properties": { + "default": { + "description": "The default permission set for the plugin", + "anyOf": [ + { + "$ref": "#/definitions/DefaultPermission" + }, + { + "type": "null" + } + ] + }, + "set": { + "description": "A list of permissions sets defined", + "type": "array", + "items": { + "$ref": "#/definitions/PermissionSet" + } + }, + "permission": { + "description": "A list of inlined permissions", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/Permission" + } + } + }, + "definitions": { + "DefaultPermission": { + "description": "The default permission set of the plugin.\n\nWorks similarly to a permission with the \"default\" identifier.", + "type": "object", + "required": [ + "permissions" + ], + "properties": { + "version": { + "description": "The version of the permission.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 1.0 + }, + "description": { + "description": "Human-readable description of what the permission does. Tauri convention is to use `

` headings in markdown content for Tauri documentation generation purposes.", + "type": [ + "string", + "null" + ] + }, + "permissions": { + "description": "All permissions this set contains.", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "PermissionSet": { + "description": "A set of direct permissions grouped together under a new name.", + "type": "object", + "required": [ + "description", + "identifier", + "permissions" + ], + "properties": { + "identifier": { + "description": "A unique identifier for the permission.", + "type": "string" + }, + "description": { + "description": "Human-readable description of what the permission does.", + "type": "string" + }, + "permissions": { + "description": "All permissions this set contains.", + "type": "array", + "items": { + "$ref": "#/definitions/PermissionKind" + } + } + } + }, + "Permission": { + "description": "Descriptions of explicit privileges of commands.\n\nIt can enable commands to be accessible in the frontend of the application.\n\nIf the scope is defined it can be used to fine grain control the access of individual or multiple commands.", + "type": "object", + "required": [ + "identifier" + ], + "properties": { + "version": { + "description": "The version of the permission.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 1.0 + }, + "identifier": { + "description": "A unique identifier for the permission.", + "type": "string" + }, + "description": { + "description": "Human-readable description of what the permission does. Tauri internal convention is to use `

` headings in markdown content for Tauri documentation generation purposes.", + "type": [ + "string", + "null" + ] + }, + "commands": { + "description": "Allowed or denied commands when using this permission.", + "default": { + "allow": [], + "deny": [] + }, + "allOf": [ + { + "$ref": "#/definitions/Commands" + } + ] + }, + "scope": { + "description": "Allowed or denied scoped when using this permission.", + "allOf": [ + { + "$ref": "#/definitions/Scopes" + } + ] + }, + "platforms": { + "description": "Target platforms this permission applies. By default all platforms are affected by this permission.", + "type": [ + "array", + "null" + ], + "items": { + "$ref": "#/definitions/Target" + } + } + } + }, + "Commands": { + "description": "Allowed and denied commands inside a permission.\n\nIf two commands clash inside of `allow` and `deny`, it should be denied by default.", + "type": "object", + "properties": { + "allow": { + "description": "Allowed command.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "deny": { + "description": "Denied command, which takes priority.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "Scopes": { + "description": "An argument for fine grained behavior control of Tauri commands.\n\nIt can be of any serde serializable type and is used to allow or prevent certain actions inside a Tauri command. The configured scope is passed to the command and will be enforced by the command implementation.\n\n## Example\n\n```json { \"allow\": [{ \"path\": \"$HOME/**\" }], \"deny\": [{ \"path\": \"$HOME/secret.txt\" }] } ```", + "type": "object", + "properties": { + "allow": { + "description": "Data that defines what is allowed by the scope.", + "type": [ + "array", + "null" + ], + "items": { + "$ref": "#/definitions/Value" + } + }, + "deny": { + "description": "Data that defines what is denied by the scope. This should be prioritized by validation logic.", + "type": [ + "array", + "null" + ], + "items": { + "$ref": "#/definitions/Value" + } + } + } + }, + "Value": { + "description": "All supported ACL values.", + "anyOf": [ + { + "description": "Represents a null JSON value.", + "type": "null" + }, + { + "description": "Represents a [`bool`].", + "type": "boolean" + }, + { + "description": "Represents a valid ACL [`Number`].", + "allOf": [ + { + "$ref": "#/definitions/Number" + } + ] + }, + { + "description": "Represents a [`String`].", + "type": "string" + }, + { + "description": "Represents a list of other [`Value`]s.", + "type": "array", + "items": { + "$ref": "#/definitions/Value" + } + }, + { + "description": "Represents a map of [`String`] keys to [`Value`]s.", + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/Value" + } + } + ] + }, + "Number": { + "description": "A valid ACL number.", + "anyOf": [ + { + "description": "Represents an [`i64`].", + "type": "integer", + "format": "int64" + }, + { + "description": "Represents a [`f64`].", + "type": "number", + "format": "double" + } + ] + }, + "Target": { + "description": "Platform target.", + "oneOf": [ + { + "description": "MacOS.", + "type": "string", + "enum": [ + "macOS" + ] + }, + { + "description": "Windows.", + "type": "string", + "enum": [ + "windows" + ] + }, + { + "description": "Linux.", + "type": "string", + "enum": [ + "linux" + ] + }, + { + "description": "Android.", + "type": "string", + "enum": [ + "android" + ] + }, + { + "description": "iOS.", + "type": "string", + "enum": [ + "iOS" + ] + } + ] + }, + "PermissionKind": { + "type": "string", + "oneOf": [ + { + "description": "Enables the chunk_text command without any pre-configured scope.", + "type": "string", + "const": "allow-chunk-text", + "markdownDescription": "Enables the chunk_text command without any pre-configured scope." + }, + { + "description": "Denies the chunk_text command without any pre-configured scope.", + "type": "string", + "const": "deny-chunk-text", + "markdownDescription": "Denies the chunk_text command without any pre-configured scope." + }, + { + "description": "Enables the create_collection command without any pre-configured scope.", + "type": "string", + "const": "allow-create-collection", + "markdownDescription": "Enables the create_collection command without any pre-configured scope." + }, + { + "description": "Denies the create_collection command without any pre-configured scope.", + "type": "string", + "const": "deny-create-collection", + "markdownDescription": "Denies the create_collection command without any pre-configured scope." + }, + { + "description": "Enables the delete_chunks command without any pre-configured scope.", + "type": "string", + "const": "allow-delete-chunks", + "markdownDescription": "Enables the delete_chunks command without any pre-configured scope." + }, + { + "description": "Denies the delete_chunks command without any pre-configured scope.", + "type": "string", + "const": "deny-delete-chunks", + "markdownDescription": "Denies the delete_chunks command without any pre-configured scope." + }, + { + "description": "Enables the delete_collection command without any pre-configured scope.", + "type": "string", + "const": "allow-delete-collection", + "markdownDescription": "Enables the delete_collection command without any pre-configured scope." + }, + { + "description": "Denies the delete_collection command without any pre-configured scope.", + "type": "string", + "const": "deny-delete-collection", + "markdownDescription": "Denies the delete_collection command without any pre-configured scope." + }, + { + "description": "Enables the get_chunks command without any pre-configured scope.", + "type": "string", + "const": "allow-get-chunks", + "markdownDescription": "Enables the get_chunks command without any pre-configured scope." + }, + { + "description": "Denies the get_chunks command without any pre-configured scope.", + "type": "string", + "const": "deny-get-chunks", + "markdownDescription": "Denies the get_chunks command without any pre-configured scope." + }, + { + "description": "Enables the get_status command without any pre-configured scope.", + "type": "string", + "const": "allow-get-status", + "markdownDescription": "Enables the get_status command without any pre-configured scope." + }, + { + "description": "Denies the get_status command without any pre-configured scope.", + "type": "string", + "const": "deny-get-status", + "markdownDescription": "Denies the get_status command without any pre-configured scope." + }, + { + "description": "Enables the insert_chunks command without any pre-configured scope.", + "type": "string", + "const": "allow-insert-chunks", + "markdownDescription": "Enables the insert_chunks command without any pre-configured scope." + }, + { + "description": "Denies the insert_chunks command without any pre-configured scope.", + "type": "string", + "const": "deny-insert-chunks", + "markdownDescription": "Denies the insert_chunks command without any pre-configured scope." + }, + { + "description": "Enables the list_attachments command without any pre-configured scope.", + "type": "string", + "const": "allow-list-attachments", + "markdownDescription": "Enables the list_attachments command without any pre-configured scope." + }, + { + "description": "Denies the list_attachments command without any pre-configured scope.", + "type": "string", + "const": "deny-list-attachments", + "markdownDescription": "Denies the list_attachments command without any pre-configured scope." + }, + { + "description": "Enables the search_collection command without any pre-configured scope.", + "type": "string", + "const": "allow-search-collection", + "markdownDescription": "Enables the search_collection command without any pre-configured scope." + }, + { + "description": "Denies the search_collection command without any pre-configured scope.", + "type": "string", + "const": "deny-search-collection", + "markdownDescription": "Denies the search_collection command without any pre-configured scope." + }, + { + "description": "Default permissions for the vector-db plugin\n#### This default permission set includes:\n\n- `allow-get-status`\n- `allow-create-collection`\n- `allow-insert-chunks`\n- `allow-search-collection`\n- `allow-delete-chunks`\n- `allow-delete-collection`\n- `allow-chunk-text`\n- `allow-list-attachments`\n- `allow-get-chunks`", + "type": "string", + "const": "default", + "markdownDescription": "Default permissions for the vector-db plugin\n#### This default permission set includes:\n\n- `allow-get-status`\n- `allow-create-collection`\n- `allow-insert-chunks`\n- `allow-search-collection`\n- `allow-delete-chunks`\n- `allow-delete-collection`\n- `allow-chunk-text`\n- `allow-list-attachments`\n- `allow-get-chunks`" + } + ] + } + } +} \ No newline at end of file diff --git a/src-tauri/plugins/tauri-plugin-vector-db/rollup.config.js b/src-tauri/plugins/tauri-plugin-vector-db/rollup.config.js new file mode 100644 index 000000000..5047bf72d --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/rollup.config.js @@ -0,0 +1,32 @@ +import { readFileSync } from 'node:fs' +import { dirname, join } from 'node:path' +import { cwd } from 'node:process' +import typescript from '@rollup/plugin-typescript' + +const pkg = JSON.parse(readFileSync(join(cwd(), 'package.json'), 'utf8')) + +export default { + input: 'guest-js/index.ts', + output: [ + { + file: pkg.exports.import, + format: 'esm' + }, + { + file: pkg.exports.require, + format: 'cjs' + } + ], + plugins: [ + typescript({ + declaration: true, + declarationDir: dirname(pkg.exports.import) + }) + ], + external: [ + /^@tauri-apps\/api/, + ...Object.keys(pkg.dependencies || {}), + ...Object.keys(pkg.peerDependencies || {}) + ] +} + diff --git a/src-tauri/plugins/tauri-plugin-vector-db/src/commands.rs b/src-tauri/plugins/tauri-plugin-vector-db/src/commands.rs new file mode 100644 index 000000000..ac697c535 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/src/commands.rs @@ -0,0 +1,167 @@ +use crate::{VectorDBError, VectorDBState}; +use crate::db::{ + self, AttachmentFileInfo, ChunkInput, SearchResult, +}; +use serde::{Deserialize, Serialize}; +use tauri::State; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Status { + pub ann_available: bool, +} + +// ============================================================================ +// Tauri Command Handlers +// ============================================================================ + +#[tauri::command] +pub async fn get_status(state: State<'_, VectorDBState>) -> Result { + println!("[VectorDB] Checking ANN availability..."); + let temp = db::collection_path(&state.base_dir, "__status__"); + let conn = db::open_or_init_conn(&temp)?; + + // Verbose version for startup diagnostics + let ann = { + if conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS temp.temp_vec USING vec0(embedding float[1])", []).is_ok() { + let _ = conn.execute("DROP TABLE IF EXISTS temp.temp_vec", []); + println!("[VectorDB] ✓ sqlite-vec already loaded"); + true + } else { + unsafe { let _ = conn.load_extension_enable(); } + let paths = db::possible_sqlite_vec_paths(); + println!("[VectorDB] Trying {} bundled paths...", paths.len()); + let mut found = false; + for p in paths { + println!("[VectorDB] Trying: {}", p); + unsafe { + if let Ok(_) = conn.load_extension(&p, Some("sqlite3_vec_init")) { + if conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS temp.temp_vec USING vec0(embedding float[1])", []).is_ok() { + let _ = conn.execute("DROP TABLE IF EXISTS temp.temp_vec", []); + println!("[VectorDB] ✓ sqlite-vec loaded from: {}", p); + found = true; + break; + } + } + } + } + if !found { + println!("[VectorDB] ✗ Failed to load sqlite-vec from all paths"); + } + found + } + }; + + println!("[VectorDB] ANN status: {}", if ann { "AVAILABLE ✓" } else { "NOT AVAILABLE ✗" }); + Ok(Status { ann_available: ann }) +} + +#[tauri::command] +pub async fn create_collection( + _app: tauri::AppHandle, + state: State<'_, VectorDBState>, + name: String, + dimension: usize, +) -> Result<(), VectorDBError> { + let path = db::collection_path(&state.base_dir, &name); + let conn = db::open_or_init_conn(&path)?; + + let has_ann = db::create_schema(&conn, dimension)?; + if has_ann { + println!("[VectorDB] ✓ Collection '{}' created with ANN support", name); + } else { + println!("[VectorDB] ⚠ Collection '{}' created WITHOUT ANN support (will use linear search)", name); + } + Ok(()) +} + +#[tauri::command] +pub async fn insert_chunks( + _app: tauri::AppHandle, + state: State<'_, VectorDBState>, + collection: String, + chunks: Vec, +) -> Result<(), VectorDBError> { + let path = db::collection_path(&state.base_dir, &collection); + let conn = db::open_or_init_conn(&path)?; + let vec_loaded = db::try_load_sqlite_vec(&conn); + db::insert_chunks(&conn, chunks, vec_loaded)?; + Ok(()) +} + +#[tauri::command] +pub async fn search_collection( + _app: tauri::AppHandle, + state: State<'_, VectorDBState>, + collection: String, + query_embedding: Vec, + limit: usize, + threshold: f32, + mode: Option, + file_ids: Option>, +) -> Result, VectorDBError> { + let path = db::collection_path(&state.base_dir, &collection); + let conn = db::open_or_init_conn(&path)?; + let vec_loaded = db::try_load_sqlite_vec(&conn); + db::search_collection(&conn, &query_embedding, limit, threshold, mode, vec_loaded, file_ids) +} + +#[tauri::command] +pub async fn list_attachments( + _app: tauri::AppHandle, + state: State<'_, VectorDBState>, + collection: String, + limit: Option, +) -> Result, VectorDBError> { + let path = db::collection_path(&state.base_dir, &collection); + let conn = db::open_or_init_conn(&path)?; + db::list_attachments(&conn, limit) +} + +#[tauri::command] +pub async fn delete_chunks( + _app: tauri::AppHandle, + state: State<'_, VectorDBState>, + collection: String, + ids: Vec, +) -> Result<(), VectorDBError> { + let path = db::collection_path(&state.base_dir, &collection); + let conn = db::open_or_init_conn(&path)?; + db::delete_chunks(&conn, ids) +} + +#[tauri::command] +pub async fn delete_collection( + _app: tauri::AppHandle, + state: State<'_, VectorDBState>, + collection: String, +) -> Result<(), VectorDBError> { + let path = db::collection_path(&state.base_dir, &collection); + if path.exists() { + std::fs::remove_file(path).ok(); + } + Ok(()) +} + +#[tauri::command] +pub async fn chunk_text( + _app: tauri::AppHandle, + text: String, + chunk_size: usize, + chunk_overlap: usize, +) -> Result, VectorDBError> { + Ok(db::chunk_text(text, chunk_size, chunk_overlap)) +} + +#[tauri::command] +pub async fn get_chunks( + _app: tauri::AppHandle, + state: State<'_, VectorDBState>, + collection: String, + file_id: String, + start_order: i64, + end_order: i64, +) -> Result, VectorDBError> { + let path = db::collection_path(&state.base_dir, &collection); + let conn = db::open_or_init_conn(&path)?; + db::get_chunks(&conn, file_id, start_order, end_order) +} diff --git a/src-tauri/plugins/tauri-plugin-vector-db/src/db.rs b/src-tauri/plugins/tauri-plugin-vector-db/src/db.rs new file mode 100644 index 000000000..725fc3005 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/src/db.rs @@ -0,0 +1,612 @@ +use crate::VectorDBError; +use crate::utils::{cosine_similarity, from_le_bytes_vec, to_le_bytes_vec}; +use rusqlite::{params, Connection, OptionalExtension}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fs; +use std::path::PathBuf; +use uuid::Uuid; + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FileMetadata { + pub name: Option, + pub path: String, + #[serde(rename = "type")] + pub file_type: Option, + pub size: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ChunkMetadata { + pub file: FileMetadata, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct ChunkInput { + pub id: Option, + pub text: String, + pub embedding: Vec, + pub metadata: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct SearchResult { + pub id: String, + pub text: String, + pub score: Option, + pub file_id: String, + pub chunk_file_order: i64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct AttachmentFileInfo { + pub id: String, + pub name: Option, + pub path: Option, + #[serde(rename = "type")] + pub file_type: Option, + pub size: Option, + pub chunk_count: i64, +} + +// ============================================================================ +// Connection & Path Management +// ============================================================================ + +pub fn collection_path(base: &PathBuf, name: &str) -> PathBuf { + let mut p = base.clone(); + let clean = name.replace(['/', '\\'], "_"); + let filename = format!("{}.db", clean); + p.push(&filename); + p +} + +pub fn open_or_init_conn(path: &PathBuf) -> Result { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).ok(); + } + let conn = Connection::open(path)?; + Ok(conn) +} + +// ============================================================================ +// SQLite-vec Extension Loading +// ============================================================================ + +pub fn try_load_sqlite_vec(conn: &Connection) -> bool { + // Check if vec0 module is already available + if conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS temp.temp_vec USING vec0(embedding float[1])", []).is_ok() { + let _ = conn.execute("DROP TABLE IF EXISTS temp.temp_vec", []); + return true; + } + + unsafe { + let _ = conn.load_extension_enable(); + } + + let paths = possible_sqlite_vec_paths(); + for p in paths { + unsafe { + if let Ok(_) = conn.load_extension(&p, Some("sqlite3_vec_init")) { + if conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS temp.temp_vec USING vec0(embedding float[1])", []).is_ok() { + let _ = conn.execute("DROP TABLE IF EXISTS temp.temp_vec", []); + return true; + } + } + } + } + + false +} + +pub fn possible_sqlite_vec_paths() -> Vec { + let mut paths = Vec::new(); + + // Dev paths + paths.push("./src-tauri/resources/bin/sqlite-vec".to_string()); + paths.push("./resources/bin/sqlite-vec".to_string()); + + // Exe-relative paths + if let Ok(exe) = std::env::current_exe() { + if let Some(dir) = exe.parent() { + let mut d = dir.to_path_buf(); + d.push("resources"); + d.push("bin"); + d.push("sqlite-vec"); + paths.push(d.to_string_lossy().to_string()); + } + + #[cfg(target_os = "macos")] + { + if let Some(mac_dir) = exe.parent().and_then(|p| p.parent()) { + let mut r = mac_dir.to_path_buf(); + r.push("Resources"); + r.push("bin"); + r.push("sqlite-vec"); + paths.push(r.to_string_lossy().to_string()); + } + } + } + paths +} + +pub fn ensure_vec_table(conn: &Connection, dimension: usize) -> bool { + if try_load_sqlite_vec(conn) { + let create = format!( + "CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec USING vec0(embedding float[{}])", + dimension + ); + match conn.execute(&create, []) { + Ok(_) => return true, + Err(e) => { + println!("[VectorDB] ✗ Failed to create chunks_vec: {}", e); + } + } + } + false +} + +// ============================================================================ +// Schema Creation +// ============================================================================ + +pub fn create_schema(conn: &Connection, dimension: usize) -> Result { + // Files table + conn.execute( + "CREATE TABLE IF NOT EXISTS files ( + id TEXT PRIMARY KEY, + path TEXT UNIQUE NOT NULL, + name TEXT, + type TEXT, + size INTEGER, + chunk_count INTEGER DEFAULT 0 + )", + [], + )?; + + // Chunks table + conn.execute( + "CREATE TABLE IF NOT EXISTS chunks ( + id TEXT PRIMARY KEY, + text TEXT NOT NULL, + embedding BLOB NOT NULL, + file_id TEXT, + chunk_file_order INTEGER, + FOREIGN KEY (file_id) REFERENCES files(id) + )", + [], + )?; + + conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_id ON chunks(id)", [])?; + conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_file_id ON chunks(file_id)", [])?; + conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_file_order ON chunks(file_id, chunk_file_order)", [])?; + + // Try to create vec virtual table + let has_ann = ensure_vec_table(conn, dimension); + Ok(has_ann) +} + +// ============================================================================ +// Insert Operations +// ============================================================================ + +pub fn insert_chunks( + conn: &Connection, + chunks: Vec, + vec_loaded: bool, +) -> Result<(), VectorDBError> { + let tx = conn.unchecked_transaction()?; + + // Check if vec virtual table exists + let has_vec = if vec_loaded { + conn + .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_vec'") + .and_then(|mut s| s.query_row([], |r| r.get::<_, String>(0)).optional()) + .ok() + .flatten() + .is_some() + } else { + false + }; + + let mut file_id_cache: HashMap = HashMap::new(); + let mut file_chunk_counters: HashMap = HashMap::new(); + + for ch in chunks.into_iter() { + let emb = to_le_bytes_vec(&ch.embedding); + + // Extract file info from metadata and get/create file_id + let mut file_id: Option = None; + if let Some(ref meta) = ch.metadata { + let file_path = &meta.file.path; + + // Check cache first + if let Some(cached_id) = file_id_cache.get(file_path) { + file_id = Some(cached_id.clone()); + } else { + // Generate UUID for new file + let uuid = Uuid::new_v4().to_string(); + + // Insert or ignore if path already exists + tx.execute( + "INSERT OR IGNORE INTO files (id, path, name, type, size) VALUES (?1, ?2, ?3, ?4, ?5)", + params![ + &uuid, + &meta.file.path, + &meta.file.name, + &meta.file.file_type, + meta.file.size + ], + )?; + + // Get the actual id (either the one we just inserted or existing one) + let id: String = tx.query_row( + "SELECT id FROM files WHERE path = ?1", + params![file_path], + |row| row.get(0), + )?; + file_id = Some(id.clone()); + file_id_cache.insert(file_path.clone(), id); + } + } + + // Get or initialize chunk order for this file + let chunk_order = if let Some(ref fid) = file_id { + let counter = file_chunk_counters.entry(fid.clone()).or_insert_with(|| { + // Get max existing order for this file + tx.query_row( + "SELECT COALESCE(MAX(chunk_file_order), -1) FROM chunks WHERE file_id = ?1", + params![fid], + |row| row.get::<_, i64>(0), + ).unwrap_or(-1) + }); + *counter += 1; + *counter + } else { + 0 + }; + + // Generate UUID for chunk if not provided + let chunk_id = ch.id.unwrap_or_else(|| Uuid::new_v4().to_string()); + + tx.execute( + "INSERT OR REPLACE INTO chunks (id, text, embedding, file_id, chunk_file_order) VALUES (?1, ?2, ?3, ?4, ?5)", + params![chunk_id, ch.text, emb, file_id, chunk_order], + )?; + + if has_vec { + let rowid: i64 = tx + .prepare("SELECT rowid FROM chunks WHERE id=?1")? + .query_row(params![chunk_id], |r| r.get(0))?; + let json_vec = serde_json::to_string(&ch.embedding).unwrap_or("[]".to_string()); + match tx.execute( + "INSERT OR REPLACE INTO chunks_vec(rowid, embedding) VALUES (?1, ?2)", + params![rowid, json_vec], + ) { + Ok(_) => {} + Err(e) => { + println!("[VectorDB] ✗ Failed to insert into chunks_vec: {}", e); + } + } + } + } + + // Update chunk_count for all affected files + for file_id in file_id_cache.values() { + let count: i64 = tx.query_row( + "SELECT COUNT(*) FROM chunks WHERE file_id = ?1", + params![file_id], + |row| row.get(0), + )?; + tx.execute( + "UPDATE files SET chunk_count = ?1 WHERE id = ?2", + params![count, file_id], + )?; + } + + tx.commit()?; + Ok(()) +} + +// ============================================================================ +// Search Operations +// ============================================================================ + +pub fn search_collection( + conn: &Connection, + query_embedding: &[f32], + limit: usize, + threshold: f32, + mode: Option, + vec_loaded: bool, + file_ids: Option>, +) -> Result, VectorDBError> { + let has_vec = if vec_loaded { + conn + .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_vec'") + .and_then(|mut s| s.query_row([], |r| r.get::<_, String>(0)).optional()) + .ok() + .flatten() + .is_some() + } else { + false + }; + + let prefer_ann = match mode.as_deref() { + Some("ann") => true, + Some("linear") => false, + _ => true, // auto prefers ANN when available + }; + + if has_vec && prefer_ann { + search_ann(conn, query_embedding, limit, file_ids) + } else { + search_linear(conn, query_embedding, limit, threshold, file_ids) + } +} + +fn search_ann( + conn: &Connection, + query_embedding: &[f32], + limit: usize, + file_ids: Option>, +) -> Result, VectorDBError> { + let json_vec = serde_json::to_string(&query_embedding).unwrap_or("[]".to_string()); + + // Build query with optional file_id filtering + let query = if let Some(ref ids) = file_ids { + let placeholders = ids.iter().map(|_| "?").collect::>().join(","); + format!( + "SELECT c.id, c.text, c.file_id, c.chunk_file_order, v.distance + FROM chunks_vec v + JOIN chunks c ON c.rowid = v.rowid + WHERE v.embedding MATCH ?1 AND k = ?2 AND c.file_id IN ({}) + ORDER BY v.distance", + placeholders + ) + } else { + "SELECT c.id, c.text, c.file_id, c.chunk_file_order, v.distance + FROM chunks_vec v + JOIN chunks c ON c.rowid = v.rowid + WHERE v.embedding MATCH ?1 AND k = ?2 + ORDER BY v.distance".to_string() + }; + + let mut stmt = match conn.prepare(&query) { + Ok(s) => s, + Err(e) => { + println!("[VectorDB] ✗ Failed to prepare ANN query: {}", e); + return Err(e.into()); + } + }; + + let mut rows = if let Some(ids) = file_ids { + let mut params: Vec> = vec![ + Box::new(json_vec), + Box::new(limit as i64), + ]; + for id in ids { + params.push(Box::new(id)); + } + let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + match stmt.query(&*param_refs) { + Ok(r) => r, + Err(e) => { + println!("[VectorDB] ✗ Failed to execute ANN query: {}", e); + return Err(e.into()); + } + } + } else { + match stmt.query(params![json_vec, limit as i64]) { + Ok(r) => r, + Err(e) => { + println!("[VectorDB] ✗ Failed to execute ANN query: {}", e); + return Err(e.into()); + } + } + }; + + let mut results = Vec::new(); + while let Some(row) = rows.next()? { + let id: String = row.get(0)?; + let text: String = row.get(1)?; + let file_id: String = row.get(2)?; + let chunk_file_order: i64 = row.get(3)?; + let distance: f32 = row.get(4)?; + + results.push(SearchResult { + id, + text, + score: Some(distance), + file_id, + chunk_file_order, + }); + } + + println!("[VectorDB] ANN search returned {} results", results.len()); + Ok(results) +} + +fn search_linear( + conn: &Connection, + query_embedding: &[f32], + limit: usize, + threshold: f32, + file_ids: Option>, +) -> Result, VectorDBError> { + let (query, params_vec): (String, Vec>) = if let Some(ids) = file_ids { + let placeholders = ids.iter().map(|_| "?").collect::>().join(","); + let query_str = format!( + "SELECT c.id, c.text, c.embedding, c.file_id, c.chunk_file_order + FROM chunks c + WHERE c.file_id IN ({})", + placeholders + ); + let mut params: Vec> = Vec::new(); + for id in ids { + params.push(Box::new(id)); + } + (query_str, params) + } else { + ( + "SELECT c.id, c.text, c.embedding, c.file_id, c.chunk_file_order + FROM chunks c".to_string(), + Vec::new() + ) + }; + + let mut stmt = conn.prepare(&query)?; + let param_refs: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|p| p.as_ref()).collect(); + let mut rows = if param_refs.is_empty() { + stmt.query([])? + } else { + stmt.query(&*param_refs)? + }; + let mut results: Vec = Vec::new(); + + while let Some(row) = rows.next()? { + let id: String = row.get(0)?; + let text: String = row.get(1)?; + let embedding_bytes: Vec = row.get(2)?; + let file_id: String = row.get(3)?; + let chunk_file_order: i64 = row.get(4)?; + + let emb = from_le_bytes_vec(&embedding_bytes); + let score = cosine_similarity(query_embedding, &emb)?; + + if score >= threshold { + results.push(SearchResult { + id, + text, + score: Some(score), + file_id, + chunk_file_order, + }); + } + } + + results.sort_by(|a, b| { + match (b.score, a.score) { + (Some(b_score), Some(a_score)) => b_score.partial_cmp(&a_score).unwrap_or(std::cmp::Ordering::Equal), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, + } + }); + let take: Vec = results.into_iter().take(limit).collect(); + println!("[VectorDB] Linear search returned {} results", take.len()); + Ok(take) +} + +// ============================================================================ +// List Operations +// ============================================================================ + +pub fn list_attachments( + conn: &Connection, + limit: Option, +) -> Result, VectorDBError> { + let query = if let Some(lim) = limit { + format!("SELECT id, path, name, type, size, chunk_count FROM files LIMIT {}", lim) + } else { + "SELECT id, path, name, type, size, chunk_count FROM files".to_string() + }; + + let mut stmt = conn.prepare(&query)?; + let mut rows = stmt.query([])?; + let mut out = Vec::new(); + + while let Some(row) = rows.next()? { + let id: String = row.get(0)?; + let path: Option = row.get(1)?; + let name: Option = row.get(2)?; + let file_type: Option = row.get(3)?; + let size: Option = row.get(4)?; + let chunk_count: i64 = row.get(5)?; + out.push(AttachmentFileInfo { + id, + name, + path, + file_type, + size, + chunk_count, + }); + } + + Ok(out) +} + +// ============================================================================ +// Delete Operations +// ============================================================================ + +pub fn delete_chunks(conn: &Connection, ids: Vec) -> Result<(), VectorDBError> { + let tx = conn.unchecked_transaction()?; + for id in ids { + tx.execute("DELETE FROM chunks WHERE id = ?1", params![id])?; + } + tx.commit()?; + Ok(()) +} + +// ============================================================================ +// Get Chunks by Order +// ============================================================================ + +pub fn get_chunks( + conn: &Connection, + file_id: String, + start_order: i64, + end_order: i64, +) -> Result, VectorDBError> { + let mut stmt = conn.prepare( + "SELECT id, text, chunk_file_order FROM chunks + WHERE file_id = ?1 AND chunk_file_order >= ?2 AND chunk_file_order <= ?3 + ORDER BY chunk_file_order" + )?; + let mut rows = stmt.query(params![&file_id, start_order, end_order])?; + + let mut results = Vec::new(); + while let Some(row) = rows.next()? { + results.push(SearchResult { + id: row.get(0)?, + text: row.get(1)?, + score: None, + file_id: file_id.clone(), + chunk_file_order: row.get(2)?, + }); + } + + Ok(results) +} + +// ============================================================================ +// Utility Operations +// ============================================================================ + +pub fn chunk_text(text: String, chunk_size: usize, chunk_overlap: usize) -> Vec { + if chunk_size == 0 { + return vec![]; + } + + let mut chunks = Vec::new(); + let chars: Vec = text.chars().collect(); + let mut start = 0usize; + + while start < chars.len() { + let end = (start + chunk_size).min(chars.len()); + let ch: String = chars[start..end].iter().collect(); + chunks.push(ch); + if end >= chars.len() { + break; + } + let advance = if chunk_overlap >= chunk_size { + 1 + } else { + chunk_size - chunk_overlap + }; + start += advance; + } + + chunks +} diff --git a/src-tauri/plugins/tauri-plugin-vector-db/src/error.rs b/src-tauri/plugins/tauri-plugin-vector-db/src/error.rs new file mode 100644 index 000000000..6c2fdcb3a --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/src/error.rs @@ -0,0 +1,23 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, thiserror::Error, Serialize, Deserialize)] +pub enum VectorDBError { + #[error("Database error: {0}")] + DatabaseError(String), + + #[error("Invalid input: {0}")] + InvalidInput(String), +} + +impl From for VectorDBError { + fn from(err: rusqlite::Error) -> Self { + VectorDBError::DatabaseError(err.to_string()) + } +} + +impl From for VectorDBError { + fn from(err: serde_json::Error) -> Self { + VectorDBError::DatabaseError(err.to_string()) + } +} + diff --git a/src-tauri/plugins/tauri-plugin-vector-db/src/lib.rs b/src-tauri/plugins/tauri-plugin-vector-db/src/lib.rs new file mode 100644 index 000000000..227ab8b53 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/src/lib.rs @@ -0,0 +1,34 @@ +use tauri::{ + plugin::{Builder, TauriPlugin}, + Runtime, + Manager, +}; + +mod commands; +mod db; +mod error; +mod state; +mod utils; + +pub use error::VectorDBError; +pub use state::VectorDBState; + +pub fn init() -> TauriPlugin { + Builder::new("vector-db") + .invoke_handler(tauri::generate_handler![ + commands::create_collection, + commands::insert_chunks, + commands::search_collection, + commands::delete_chunks, + commands::delete_collection, + commands::chunk_text, + commands::get_status, + commands::list_attachments, + commands::get_chunks, + ]) + .setup(|app, _api| { + app.manage(state::VectorDBState::new()); + Ok(()) + }) + .build() +} diff --git a/src-tauri/plugins/tauri-plugin-vector-db/src/state.rs b/src-tauri/plugins/tauri-plugin-vector-db/src/state.rs new file mode 100644 index 000000000..8813625e2 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/src/state.rs @@ -0,0 +1,17 @@ +use std::path::PathBuf; + +pub struct VectorDBState { + pub base_dir: PathBuf, +} + +impl VectorDBState { + pub fn new() -> Self { + // Default vector db path: /Jan/data/db + let mut base = dirs::data_dir().unwrap_or_else(|| PathBuf::from(".")); + base.push("Jan"); + base.push("data"); + base.push("db"); + std::fs::create_dir_all(&base).ok(); + Self { base_dir: base } + } +} diff --git a/src-tauri/plugins/tauri-plugin-vector-db/src/utils.rs b/src-tauri/plugins/tauri-plugin-vector-db/src/utils.rs new file mode 100644 index 000000000..be0b54796 --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/src/utils.rs @@ -0,0 +1,27 @@ +use crate::VectorDBError; + +pub fn cosine_similarity(a: &[f32], b: &[f32]) -> Result { + if a.len() != b.len() { + return Err(VectorDBError::InvalidInput( + "Vector dimensions don't match".to_string(), + )); + } + + let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); + let mag_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); + let mag_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); + if mag_a == 0.0 || mag_b == 0.0 { return Ok(0.0); } + Ok(dot / (mag_a * mag_b)) +} + +pub fn to_le_bytes_vec(v: &[f32]) -> Vec { + v.iter().flat_map(|f| f.to_le_bytes()).collect::>() +} + +pub fn from_le_bytes_vec(bytes: &[u8]) -> Vec { + bytes + .chunks_exact(4) + .map(|b| f32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .collect::>() +} + diff --git a/src-tauri/plugins/tauri-plugin-vector-db/tsconfig.json b/src-tauri/plugins/tauri-plugin-vector-db/tsconfig.json new file mode 100644 index 000000000..60bc6a8eb --- /dev/null +++ b/src-tauri/plugins/tauri-plugin-vector-db/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2021", + "module": "esnext", + "moduleResolution": "bundler", + "skipLibCheck": true, + "strict": true, + "noUnusedLocals": true, + "noImplicitAny": true, + "noEmit": true + }, + "include": ["guest-js/*.ts"], + "exclude": ["dist-js", "node_modules"] +} + diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index a232e11f5..d3f5898c5 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -31,7 +31,9 @@ pub fn run() { .plugin(tauri_plugin_http::init()) .plugin(tauri_plugin_store::Builder::new().build()) .plugin(tauri_plugin_shell::init()) - .plugin(tauri_plugin_llamacpp::init()); + .plugin(tauri_plugin_llamacpp::init()) + .plugin(tauri_plugin_vector_db::init()) + .plugin(tauri_plugin_rag::init()); #[cfg(feature = "deep-link")] { diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index 95bdc9b39..bddbbfa86 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -21,6 +21,8 @@ import { IconCodeCircle2, IconPlayerStopFilled, IconX, + IconPaperclip, + IconLoader2, } from '@tabler/icons-react' import { useTranslation } from '@/i18n/react-i18next-compat' import { useGeneralSetting } from '@/hooks/useGeneralSetting' @@ -38,8 +40,13 @@ import { TokenCounter } from '@/components/TokenCounter' import { useMessages } from '@/hooks/useMessages' import { useShallow } from 'zustand/react/shallow' import { McpExtensionToolLoader } from './McpExtensionToolLoader' -import { ExtensionTypeEnum, MCPExtension } from '@janhq/core' +import { ExtensionTypeEnum, MCPExtension, RAGExtension } from '@janhq/core' import { ExtensionManager } from '@/lib/extension' +import { useAttachments } from '@/hooks/useAttachments' +import { open } from '@tauri-apps/plugin-dialog' +import { toast } from 'sonner' +import { PlatformFeatures } from '@/lib/platform/const' +import { PlatformFeature } from '@/lib/platform/types' type ChatInputProps = { className?: string @@ -100,10 +107,24 @@ const ChatInput = ({ dataUrl: string }> >([]) + // Document attachments (desktop RAG ingestion). We only index on send. + const [docFiles, setDocFiles] = useState< + Array<{ + name: string + path: string + size?: number + type?: string + }> + >([]) const [connectedServers, setConnectedServers] = useState([]) const [isDragOver, setIsDragOver] = useState(false) const [hasMmproj, setHasMmproj] = useState(false) const [hasActiveModels, setHasActiveModels] = useState(false) + const attachmentsEnabled = useAttachments((s) => s.enabled) + const [ingestingDocs, setIngestingDocs] = useState(false) + // Determine whether to show the Attach documents button (simple gating) + const showAttachmentButton = + attachmentsEnabled && PlatformFeatures[PlatformFeature.ATTACHMENTS] // Check for connected MCP servers useEffect(() => { @@ -182,9 +203,36 @@ const ChatInput = ({ setMessage('Please select a model to start chatting.') return } - if (!prompt.trim() && uploadedFiles.length === 0) { + if (!prompt.trim() && uploadedFiles.length === 0 && docFiles.length === 0) { return } + // If we have pending doc files, index them first + if (docFiles.length > 0) { + try { + setIngestingDocs(true) + const rag = extensionManager.get(ExtensionTypeEnum.RAG) + if (!rag?.ingestAttachments) throw new Error('Retrieval extension not available') + for (const f of docFiles) { + const id = (toast as any).loading + ? (toast as any).loading(`Indexing ${f.name || f.path}…`) + : undefined + try { + await rag.ingestAttachments(currentThreadId!, [{ path: f.path, name: f.name }]) + if (id) toast.success(`Indexed ${f.name || f.path}`, { id }) + } catch (err) { + if (id) toast.error(`Failed to index ${f.name || f.path}`, { id }) + throw err + } + } + setDocFiles([]) + } catch (err) { + const desc = err instanceof Error ? err.message : String(err) + toast.error('Failed to index attachments', { description: desc }) + setIngestingDocs(false) + return + } + setIngestingDocs(false) + } setMessage('') sendMessage( prompt, @@ -261,6 +309,42 @@ const ChatInput = ({ fileInputRef.current?.click() } + const handleAttachDocsIngest = async () => { + try { + if (!attachmentsEnabled) { + toast.info('Attachments are disabled in Settings') + return + } + if (!currentThreadId) { + toast.info('Please start a thread first to attach documents.') + return + } + const selection = await open({ + multiple: true, + filters: [ + { + name: 'Documents', + extensions: ['pdf', 'docx', 'txt', 'md', 'csv', 'xlsx', 'xls', 'ods', 'pptx', 'html', 'htm'], + }, + ], + }) + if (!selection) return + const paths = Array.isArray(selection) ? selection : [selection] + if (!paths.length) return + setDocFiles((prev) => [ + ...prev, + ...paths.map((p) => ({ + path: p, + name: p.split(/[\\/]/).pop() || p, + })), + ]) + } catch (e) { + console.error('Failed to ingest attachments:', e) + const desc = e instanceof Error ? e.message : String(e) + toast.error('Failed to attach documents', { description: desc }) + } + } + const handleRemoveFile = (indexToRemove: number) => { setUploadedFiles((prev) => prev.filter((_, index) => index !== indexToRemove) @@ -560,7 +644,7 @@ const ChatInput = ({ onDragOver={hasMmproj ? handleDragOver : undefined} onDrop={hasMmproj ? handleDrop : undefined} > - {uploadedFiles.length > 0 && ( + {(uploadedFiles.length > 0 || docFiles.length > 0) && (
{uploadedFiles.map((file, index) => { return ( @@ -587,6 +671,27 @@ const ChatInput = ({
) })} + {docFiles.map((file, index) => ( +
+ + + {file.name} + +
+ setDocFiles((prev) => + prev.filter((_, i) => i !== index) + ) + } + > + +
+
+ ))} )} )} - {/* File attachment - show only for models with mmproj */} + {/* Vision image attachment - show only for models with mmproj */} {hasMmproj && ( @@ -680,6 +785,39 @@ const ChatInput = ({ )} + {/* RAG document attachments - desktop-only via dialog; shown when feature enabled */} + {selectedModel?.capabilities?.includes('tools') && + showAttachmentButton && ( + + + +
+ {ingestingDocs ? ( + + ) : ( + + )} +
+
+ +

+ {ingestingDocs + ? 'Indexing documents…' + : 'Attach documents'} +

+
+
+
+ )} {/* Microphone - always available - Temp Hide */} {/*
@@ -703,74 +841,75 @@ const ChatInput = ({ )} {selectedModel?.capabilities?.includes('tools') && - hasActiveMCPServers && ( - MCPToolComponent ? ( - // Use custom MCP component - - ) : ( - // Use default tools dropdown - - + ) : ( + // Use default tools dropdown + + + - { + setDropdownToolsAvailable(false) + e.stopPropagation() + }} > -
{ - setDropdownToolsAvailable(false) - e.stopPropagation() + { + setDropdownToolsAvailable(isOpen) + if (isOpen) { + setTooltipToolsAvailable(false) + } }} > - { - setDropdownToolsAvailable(isOpen) - if (isOpen) { - setTooltipToolsAvailable(false) - } - }} - > - {(isOpen, toolsCount) => { - return ( -
- - {toolsCount > 0 && ( -
- - {toolsCount > 99 ? '99+' : toolsCount} - -
- )} -
- ) - }} -
-
-
- -

{t('tools')}

-
-
-
- ) - )} + {(isOpen, toolsCount) => { + return ( +
+ + {toolsCount > 0 && ( +
+ + {toolsCount > 99 ? '99+' : toolsCount} + +
+ )} +
+ ) + }} + +
+ + +

{t('tools')}

+
+ + + ))} {selectedModel?.capabilities?.includes('web_search') && ( @@ -836,16 +975,23 @@ const ChatInput = ({ ) : (