From d9e32dcc024f0322dbd941a7b78cafc05aadff12 Mon Sep 17 00:00:00 2001 From: Louis <133622055+louis-jan@users.noreply.github.com> Date: Fri, 20 Oct 2023 21:03:46 +0700 Subject: [PATCH] feat: chat with documents plugin (#417) * chore: vectordb driver plugin # Conflicts: # plugins/monitoring-plugin/package.json * chore: add langchain & index documents * feat: chat with documents plugin * chore: correct build step --------- Co-authored-by: namvuong <22463238+vuonghoainam@users.noreply.github.com> --- plugins/model-management-plugin/index.ts | 9 - plugins/retrieval-plugin/@types/global.d.ts | 2 + plugins/retrieval-plugin/README.md | 73 +++++++++ plugins/retrieval-plugin/package.json | 43 +++++ plugins/retrieval-plugin/src/index.ts | 172 ++++++++++++++++++++ plugins/retrieval-plugin/src/module.ts | 58 +++++++ plugins/retrieval-plugin/tsconfig.json | 13 ++ plugins/retrieval-plugin/webpack.config.js | 35 ++++ web/app/_components/Preferences.tsx | 1 + 9 files changed, 397 insertions(+), 9 deletions(-) create mode 100644 plugins/retrieval-plugin/@types/global.d.ts create mode 100644 plugins/retrieval-plugin/README.md create mode 100644 plugins/retrieval-plugin/package.json create mode 100644 plugins/retrieval-plugin/src/index.ts create mode 100644 plugins/retrieval-plugin/src/module.ts create mode 100644 plugins/retrieval-plugin/tsconfig.json create mode 100644 plugins/retrieval-plugin/webpack.config.js diff --git a/plugins/model-management-plugin/index.ts b/plugins/model-management-plugin/index.ts index ea1ec4391..1d6e037f4 100644 --- a/plugins/model-management-plugin/index.ts +++ b/plugins/model-management-plugin/index.ts @@ -47,15 +47,6 @@ function updateFinishedDownloadAt(_id: string): Promise { return store.updateMany("models", { _id }, { time: Date.now(), finishDownloadAt: 1 }); } -/** - * Retrieves all unfinished models from the database. - * - * @returns A promise that resolves with an array of unfinished models. - */ -function getUnfinishedDownloadModels(): Promise { - return store.findMany("models", { finishDownloadAt: -1 }, [{ startDownloadAt: "desc" }]); -} - /** * Retrieves all finished models from the database. * diff --git a/plugins/retrieval-plugin/@types/global.d.ts b/plugins/retrieval-plugin/@types/global.d.ts new file mode 100644 index 000000000..87105f099 --- /dev/null +++ b/plugins/retrieval-plugin/@types/global.d.ts @@ -0,0 +1,2 @@ +declare const PLUGIN_NAME: string; +declare const MODULE_PATH: string; diff --git a/plugins/retrieval-plugin/README.md b/plugins/retrieval-plugin/README.md new file mode 100644 index 000000000..ae70eb4ec --- /dev/null +++ b/plugins/retrieval-plugin/README.md @@ -0,0 +1,73 @@ +# Create a Jan Plugin using Typescript + +Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀 + +## Create Your Own Plugin + +To create your own plugin, you can use this repository as a template! Just follow the below instructions: + +1. Click the Use this template button at the top of the repository +2. Select Create a new repository +3. Select an owner and name for your new repository +4. Click Create repository +5. Clone your new repository + +## Initial Setup + +After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin. + +> [!NOTE] +> +> You'll need to have a reasonably modern version of +> [Node.js](https://nodejs.org) handy. If you are using a version manager like +> [`nodenv`](https://github.com/nodenv/nodenv) or +> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the +> root of your repository to install the version specified in +> [`package.json`](./package.json). Otherwise, 20.x or later should work! + +1. :hammer_and_wrench: Install the dependencies + + ```bash + npm install + ``` + +1. :building_construction: Package the TypeScript for distribution + + ```bash + npm run bundle + ``` + +1. :white_check_mark: Check your artifact + + There will be a tgz file in your plugin directory now + +## Update the Plugin Metadata + +The [`package.json`](package.json) file defines metadata about your plugin, such as +plugin name, main entry, description and version. + +When you copy this repository, update `package.json` with the name, description for your plugin. + +## Update the Plugin Code + +The [`src/`](./src/) directory is the heart of your plugin! This contains the +source code that will be run when your plugin extension functions are invoked. You can replace the +contents of this directory with your own code. + +There are a few things to keep in mind when writing your plugin code: + +- Most Jan Plugin Extension functions are processed asynchronously. + In `index.ts`, you will see that the extension function will return a `Promise`. + + ```typescript + import { core } from "@janhq/core"; + + function onStart(): Promise { + return core.invokePluginFunc(MODULE_PATH, "run", 0); + } + ``` + + For more information about the Jan Plugin Core module, see the + [documentation](https://github.com/janhq/jan/blob/main/core/README.md). + +So, what are you waiting for? Go ahead and start customizing your plugin! diff --git a/plugins/retrieval-plugin/package.json b/plugins/retrieval-plugin/package.json new file mode 100644 index 000000000..2d2a89bed --- /dev/null +++ b/plugins/retrieval-plugin/package.json @@ -0,0 +1,43 @@ +{ + "name": "retrieval-plugin", + "version": "1.0.0", + "description": "Retrieval plugin for Jan app", + "icon": "https://raw.githubusercontent.com/tailwindlabs/heroicons/88e98b0c2b458553fbadccddc2d2f878edc0387b/src/20/solid/circle-stack.svg", + "main": "dist/index.js", + "module": "dist/module.js", + "author": "Jan ", + "license": "MIT", + "activationPoints": [ + "init" + ], + "scripts": { + "build": "tsc -b . && webpack --config webpack.config.js", + "bundle": "npm pack" + }, + "devDependencies": { + "webpack": "^5.88.2", + "webpack-cli": "^5.1.4" + }, + "dependencies": { + "@janhq/core": "^0.1.1", + "faiss-node": "^0.5.1", + "install": "^0.13.0", + "langchain": "^0.0.169", + "npm": "^10.2.0", + "pdf-parse": "^1.1.1", + "ts-loader": "^9.5.0" + }, + "engines": { + "node": ">=18.0.0" + }, + "files": [ + "dist/*", + "package.json", + "README.md" + ], + "bundleDependencies": [ + "pdf-parse", + "langchain", + "faiss-node" + ] +} diff --git a/plugins/retrieval-plugin/src/index.ts b/plugins/retrieval-plugin/src/index.ts new file mode 100644 index 000000000..ca8b9daae --- /dev/null +++ b/plugins/retrieval-plugin/src/index.ts @@ -0,0 +1,172 @@ +/** + * The entrypoint for the plugin. + */ + +import { + EventName, + NewMessageRequest, + PluginService, + RegisterExtensionPoint, + invokePluginFunc, + events, + preferences, + store, +} from "@janhq/core"; + +/** + * Register event listener. + */ +const registerListener = () => { + events.on(EventName.OnNewMessageRequest, inferenceRequest); +}; + +/** + * Invokes the `ingest` function from the `module.js` file using the `invokePluginFunc` method. + * "ingest" is the name of the function to invoke. + * @returns {Promise} A promise that resolves with the result of the `run` function. + */ +function onStart(): Promise { + registerListener(); + ingest(); + return Promise.resolve(); +} + +/** + * Retrieves the document ingestion directory path from the `preferences` module and invokes the `ingest` function + * from the specified module with the directory path and additional options. + * The additional options are retrieved from the `preferences` module using the `PLUGIN_NAME` constant. + */ +async function ingest() { + const path = await preferences.get(PLUGIN_NAME, "ingestDocumentDirectoryPath"); + + // TODO: Hiro - Add support for custom embeddings + const customizedEmbedding = undefined; + + if (path && path.length > 0) { + const openAPIKey = await preferences.get(PLUGIN_NAME, "openAIApiKey"); + const azureOpenAIBasePath = await preferences.get(PLUGIN_NAME, "azureOpenAIBasePath"); + const azureOpenAIApiInstanceName = await preferences.get(PLUGIN_NAME, "azureOpenAIApiInstanceName"); + invokePluginFunc(MODULE_PATH, "ingest", path, customizedEmbedding, { + openAIApiKey: openAPIKey?.length > 0 ? openAPIKey : undefined, + azureOpenAIApiKey: await preferences.get(PLUGIN_NAME, "azureOpenAIApiKey"), + azureOpenAIApiVersion: await preferences.get(PLUGIN_NAME, "azureOpenAIApiVersion"), + azureOpenAIApiInstanceName: azureOpenAIApiInstanceName?.length > 0 ? azureOpenAIApiInstanceName : undefined, + azureOpenAIApiDeploymentName: await preferences.get(PLUGIN_NAME, "azureOpenAIApiDeploymentNameRag"), + azureOpenAIBasePath: azureOpenAIBasePath?.length > 0 ? azureOpenAIBasePath : undefined, + }); + } +} + +/** + * Retrieves the document ingestion directory path from the `preferences` module and invokes the `ingest` function + * from the specified module with the directory path and additional options. + * The additional options are retrieved from the `preferences` module using the `PLUGIN_NAME` constant. + */ +async function inferenceRequest(data: NewMessageRequest): Promise { + // TODO: Hiro - Add support for custom embeddings + const customLLM = undefined; + const message = { + ...data, + message: "", + user: "RAG", + createdAt: new Date().toISOString(), + _id: undefined, + }; + const id = await store.insertOne("messages", message); + message._id = id; + events.emit(EventName.OnNewMessageResponse, message); + + const openAPIKey = await preferences.get(PLUGIN_NAME, "openAIApiKey"); + const azureOpenAIBasePath = await preferences.get(PLUGIN_NAME, "azureOpenAIBasePath"); + const azureOpenAIApiInstanceName = await preferences.get(PLUGIN_NAME, "azureOpenAIApiInstanceName"); + invokePluginFunc(MODULE_PATH, "chatWithDocs", data.message, customLLM, { + openAIApiKey: openAPIKey?.length > 0 ? openAPIKey : undefined, + azureOpenAIApiKey: await preferences.get(PLUGIN_NAME, "azureOpenAIApiKey"), + azureOpenAIApiVersion: await preferences.get(PLUGIN_NAME, "azureOpenAIApiVersion"), + azureOpenAIApiInstanceName: azureOpenAIApiInstanceName?.length > 0 ? azureOpenAIApiInstanceName : undefined, + azureOpenAIApiDeploymentName: await preferences.get(PLUGIN_NAME, "azureOpenAIApiDeploymentNameChat"), + azureOpenAIBasePath: azureOpenAIBasePath?.length > 0 ? azureOpenAIBasePath : undefined, + modelName: "gpt-3.5-turbo-16k", + temperature: 0.2, + }).then(async (text) => { + console.log("RAG Response:", text); + message.message = text; + + events.emit(EventName.OnMessageResponseUpdate, message); + }); +} +/** + * Initializes the plugin by registering the extension functions with the given register function. + * @param {Function} options.register - The function to use for registering the extension functions + */ +export function init({ register }: { register: RegisterExtensionPoint }) { + register(PluginService.OnStart, PLUGIN_NAME, onStart); + register(PluginService.OnPreferencesUpdate, PLUGIN_NAME, ingest); + + preferences.registerPreferences( + register, + PLUGIN_NAME, + "ingestDocumentDirectoryPath", + "Document Ingest Directory Path", + "The URL of the directory containing the documents to ingest", + undefined + ); + + preferences.registerPreferences( + register, + PLUGIN_NAME, + "openAIApiKey", + "Open API Key", + "OpenAI API Key", + undefined + ); + + preferences.registerPreferences( + register, + PLUGIN_NAME, + "azureOpenAIApiKey", + "Azure API Key", + "Azure Project API Key", + undefined + ); + preferences.registerPreferences( + register, + PLUGIN_NAME, + "azureOpenAIApiVersion", + "Azure API Version", + "Azure Project API Version", + undefined + ); + preferences.registerPreferences( + register, + PLUGIN_NAME, + "azureOpenAIApiInstanceName", + "Azure Instance Name", + "Azure Project Instance Name", + undefined + ); + preferences.registerPreferences( + register, + PLUGIN_NAME, + "azureOpenAIApiDeploymentNameChat", + "Azure Chat Model Deployment Name", + "Azure Project Chat Model Deployment Name (e.g. gpt-3.5-turbo-16k)", + undefined + ); + preferences.registerPreferences( + register, + PLUGIN_NAME, + "azureOpenAIApiDeploymentNameRag", + "Azure Text Embedding Model Deployment Name", + "Azure Project Text Embedding Model Deployment Name (e.g. text-embedding-ada-002)", + undefined + ); + preferences.registerPreferences( + register, + PLUGIN_NAME, + "azureOpenAIBasePath", + "Azure Base Path", + "Azure Project Base Path", + undefined + ); +} diff --git a/plugins/retrieval-plugin/src/module.ts b/plugins/retrieval-plugin/src/module.ts new file mode 100644 index 000000000..dbbd5bd0f --- /dev/null +++ b/plugins/retrieval-plugin/src/module.ts @@ -0,0 +1,58 @@ +const path = require("path"); +const { app } = require("electron"); +const { DirectoryLoader } = require("langchain/document_loaders/fs/directory"); +const { OpenAIEmbeddings } = require("langchain/embeddings/openai"); +const { PDFLoader } = require("langchain/document_loaders/fs/pdf"); +const { CharacterTextSplitter } = require("langchain/text_splitter"); +const { FaissStore } = require("langchain/vectorstores/faiss"); +const { ChatOpenAI } = require("langchain/chat_models/openai"); +const { RetrievalQAChain } = require("langchain/chains"); + +var db: any | undefined = undefined; + +/** + * Ingests documents from the specified directory + * If an `embedding` object is not provided, uses OpenAIEmbeddings. + * The resulting embeddings are stored in the database using Faiss. + * @param docDir - The directory containing the documents to ingest. + * @param embedding - An optional object used to generate embeddings for the documents. + * @param config - An optional configuration object used to create a new `OpenAIEmbeddings` object. + */ +async function ingest(docDir: string, embedding?: any, config?: any) { + const loader = new DirectoryLoader(docDir, { + ".pdf": (path) => new PDFLoader(path), + }); + const docs = await loader.load(); + const textSplitter = new CharacterTextSplitter(); + const docsQA = await textSplitter.splitDocuments(docs); + const embeddings = embedding ?? new OpenAIEmbeddings({ ...config }); + db = await FaissStore.fromDocuments(await docsQA, embeddings); + console.log("Documents are ingested"); +} + +/** + * Generates an answer to a given question using the specified `llm` or a new `ChatOpenAI`. + * The function uses the `RetrievalQAChain` class to retrieve the most relevant document from the database and generate an answer. + * @param question - The question to generate an answer for. + * @param llm - An optional object used to generate the answer. + * @param config - An optional configuration object used to create a new `ChatOpenAI` object, can be ignored if llm is specified. + * @returns A Promise that resolves with the generated answer. + */ +async function chatWithDocs(question: string, llm?: any, config?: any): Promise { + const llm_question_answer = + llm ?? + new ChatOpenAI({ + temperature: 0.2, + ...config, + }); + const qa = RetrievalQAChain.fromLLM(llm_question_answer, db.asRetriever(), { + verbose: true, + }); + const answer = await qa.run(question); + return answer; +} + +module.exports = { + ingest, + chatWithDocs, +}; diff --git a/plugins/retrieval-plugin/tsconfig.json b/plugins/retrieval-plugin/tsconfig.json new file mode 100644 index 000000000..73b76094a --- /dev/null +++ b/plugins/retrieval-plugin/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "es2016", + "module": "ES6", + "moduleResolution": "node", + "outDir": "./dist", + "rootDir": "./src", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": false, + "skipLibCheck": true + } +} diff --git a/plugins/retrieval-plugin/webpack.config.js b/plugins/retrieval-plugin/webpack.config.js new file mode 100644 index 000000000..0f78724bb --- /dev/null +++ b/plugins/retrieval-plugin/webpack.config.js @@ -0,0 +1,35 @@ +const path = require("path"); +const webpack = require("webpack"); +const packageJson = require("./package.json"); + +module.exports = { + experiments: { outputModule: true }, + entry: "./src/index.ts", + mode: "production", + module: { + rules: [ + { + test: /\.tsx?$/, + use: "ts-loader", + exclude: /node_modules/, + }, + ], + }, + plugins: [ + new webpack.DefinePlugin({ + PLUGIN_NAME: JSON.stringify(packageJson.name), + MODULE_PATH: JSON.stringify(`${packageJson.name}/${packageJson.module}`), + }), + ], + output: { + filename: "index.js", + path: path.resolve(__dirname, "dist"), + library: { type: "module" }, + }, + resolve: { + extensions: [".ts", ".js"], + }, + optimization: { + minimize: false, + }, +}; diff --git a/web/app/_components/Preferences.tsx b/web/app/_components/Preferences.tsx index 021f6d2f8..18f864ac1 100644 --- a/web/app/_components/Preferences.tsx +++ b/web/app/_components/Preferences.tsx @@ -149,6 +149,7 @@ export const Preferences = () => { if (timeout) { clearTimeout(timeout) } + if(extensionPoints.get(PluginService.OnPreferencesUpdate)) timeout = setTimeout(() => execute(PluginService.OnPreferencesUpdate), 100) }