feat: chat with documents plugin (#417)

* chore: vectordb driver plugin # Conflicts: # plugins/monitoring-plugin/package.json * chore: add langchain & index documents * feat: chat with documents plugin * chore: correct build step --------- Co-authored-by: namvuong <22463238+vuonghoainam@users.noreply.github.com>
2023-10-20 21:03:46 +07:00 · 2023-10-20 21:03:46 +07:00 · d9e32dcc02
commit d9e32dcc02
parent 0f5ef6473b
9 changed files with 397 additions and 9 deletions
--- a/plugins/model-management-plugin/index.ts
+++ b/plugins/model-management-plugin/index.ts
@ -47,15 +47,6 @@ function updateFinishedDownloadAt(_id: string): Promise<any> {
  return store.updateMany("models", { _id }, { time: Date.now(), finishDownloadAt: 1 });
 }

-/**
- * Retrieves all unfinished models from the database.
- *
- * @returns A promise that resolves with an array of unfinished models.
- */
-function getUnfinishedDownloadModels(): Promise<any> {
-  return store.findMany("models", { finishDownloadAt: -1 }, [{ startDownloadAt: "desc" }]);
-}
-
 /**
 * Retrieves all finished models from the database.
 *
--- a/plugins/retrieval-plugin/@types/global.d.ts
+++ b/plugins/retrieval-plugin/@types/global.d.ts
@ -0,0 +1,2 @@
+declare const PLUGIN_NAME: string;
+declare const MODULE_PATH: string;
--- a/plugins/retrieval-plugin/README.md
+++ b/plugins/retrieval-plugin/README.md
@ -0,0 +1,73 @@
+# Create a Jan Plugin using Typescript
+
+Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀
+
+## Create Your Own Plugin
+
+To create your own plugin, you can use this repository as a template! Just follow the below instructions:
+
+1. Click the Use this template button at the top of the repository
+2. Select Create a new repository
+3. Select an owner and name for your new repository
+4. Click Create repository
+5. Clone your new repository
+
+## Initial Setup
+
+After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin.
+
+> [!NOTE]
+>
+> You'll need to have a reasonably modern version of
+> [Node.js](https://nodejs.org) handy. If you are using a version manager like
+> [`nodenv`](https://github.com/nodenv/nodenv) or
+> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
+> root of your repository to install the version specified in
+> [`package.json`](./package.json). Otherwise, 20.x or later should work!
+
+1. :hammer_and_wrench: Install the dependencies
+
+   ```bash
+   npm install
+   ```
+
+1. :building_construction: Package the TypeScript for distribution
+
+   ```bash
+   npm run bundle
+   ```
+
+1. :white_check_mark: Check your artifact
+
+   There will be a tgz file in your plugin directory now
+
+## Update the Plugin Metadata
+
+The [`package.json`](package.json) file defines metadata about your plugin, such as
+plugin name, main entry, description and version.
+
+When you copy this repository, update `package.json` with the name, description for your plugin.
+
+## Update the Plugin Code
+
+The [`src/`](./src/) directory is the heart of your plugin! This contains the
+source code that will be run when your plugin extension functions are invoked. You can replace the
+contents of this directory with your own code.
+
+There are a few things to keep in mind when writing your plugin code:
+
+- Most Jan Plugin Extension functions are processed asynchronously.
+  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
+
+  ```typescript
+  import { core } from "@janhq/core";
+
+  function onStart(): Promise<any> {
+    return core.invokePluginFunc(MODULE_PATH, "run", 0);
+  }
+  ```
+
+  For more information about the Jan Plugin Core module, see the
+  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
+
+So, what are you waiting for? Go ahead and start customizing your plugin!
--- a/plugins/retrieval-plugin/package.json
+++ b/plugins/retrieval-plugin/package.json
@ -0,0 +1,43 @@
+{
+  "name": "retrieval-plugin",
+  "version": "1.0.0",
+  "description": "Retrieval plugin for Jan app",
+  "icon": "https://raw.githubusercontent.com/tailwindlabs/heroicons/88e98b0c2b458553fbadccddc2d2f878edc0387b/src/20/solid/circle-stack.svg",
+  "main": "dist/index.js",
+  "module": "dist/module.js",
+  "author": "Jan <service@jan.ai>",
+  "license": "MIT",
+  "activationPoints": [
+    "init"
+  ],
+  "scripts": {
+    "build": "tsc -b . && webpack --config webpack.config.js",
+    "bundle": "npm pack"
+  },
+  "devDependencies": {
+    "webpack": "^5.88.2",
+    "webpack-cli": "^5.1.4"
+  },
+  "dependencies": {
+    "@janhq/core": "^0.1.1",
+    "faiss-node": "^0.5.1",
+    "install": "^0.13.0",
+    "langchain": "^0.0.169",
+    "npm": "^10.2.0",
+    "pdf-parse": "^1.1.1",
+    "ts-loader": "^9.5.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ],
+  "bundleDependencies": [
+    "pdf-parse",
+    "langchain",
+    "faiss-node"
+  ]
+}
--- a/plugins/retrieval-plugin/src/index.ts
+++ b/plugins/retrieval-plugin/src/index.ts
@ -0,0 +1,172 @@
+/**
+ * The entrypoint for the plugin.
+ */
+
+import {
+  EventName,
+  NewMessageRequest,
+  PluginService,
+  RegisterExtensionPoint,
+  invokePluginFunc,
+  events,
+  preferences,
+  store,
+} from "@janhq/core";
+
+/**
+ * Register event listener.
+ */
+const registerListener = () => {
+  events.on(EventName.OnNewMessageRequest, inferenceRequest);
+};
+
+/**
+ * Invokes the `ingest` function from the `module.js` file using the `invokePluginFunc` method.
+ * "ingest" is the name of the function to invoke.
+ * @returns {Promise<any>} A promise that resolves with the result of the `run` function.
+ */
+function onStart(): Promise<void> {
+  registerListener();
+  ingest();
+  return Promise.resolve();
+}
+
+/**
+ * Retrieves the document ingestion directory path from the `preferences` module and invokes the `ingest` function
+ * from the specified module with the directory path and additional options.
+ * The additional options are retrieved from the `preferences` module using the `PLUGIN_NAME` constant.
+ */
+async function ingest() {
+  const path = await preferences.get(PLUGIN_NAME, "ingestDocumentDirectoryPath");
+
+  // TODO: Hiro - Add support for custom embeddings
+  const customizedEmbedding = undefined;
+
+  if (path && path.length > 0) {
+    const openAPIKey = await preferences.get(PLUGIN_NAME, "openAIApiKey");
+    const azureOpenAIBasePath = await preferences.get(PLUGIN_NAME, "azureOpenAIBasePath");
+    const azureOpenAIApiInstanceName = await preferences.get(PLUGIN_NAME, "azureOpenAIApiInstanceName");
+    invokePluginFunc(MODULE_PATH, "ingest", path, customizedEmbedding, {
+      openAIApiKey: openAPIKey?.length > 0 ? openAPIKey : undefined,
+      azureOpenAIApiKey: await preferences.get(PLUGIN_NAME, "azureOpenAIApiKey"),
+      azureOpenAIApiVersion: await preferences.get(PLUGIN_NAME, "azureOpenAIApiVersion"),
+      azureOpenAIApiInstanceName: azureOpenAIApiInstanceName?.length > 0 ? azureOpenAIApiInstanceName : undefined,
+      azureOpenAIApiDeploymentName: await preferences.get(PLUGIN_NAME, "azureOpenAIApiDeploymentNameRag"),
+      azureOpenAIBasePath: azureOpenAIBasePath?.length > 0 ? azureOpenAIBasePath : undefined,
+    });
+  }
+}
+
+/**
+ * Retrieves the document ingestion directory path from the `preferences` module and invokes the `ingest` function
+ * from the specified module with the directory path and additional options.
+ * The additional options are retrieved from the `preferences` module using the `PLUGIN_NAME` constant.
+ */
+async function inferenceRequest(data: NewMessageRequest): Promise<any> {
+  // TODO: Hiro - Add support for custom embeddings
+  const customLLM = undefined;
+  const message = {
+    ...data,
+    message: "",
+    user: "RAG",
+    createdAt: new Date().toISOString(),
+    _id: undefined,
+  };
+  const id = await store.insertOne("messages", message);
+  message._id = id;
+  events.emit(EventName.OnNewMessageResponse, message);
+
+  const openAPIKey = await preferences.get(PLUGIN_NAME, "openAIApiKey");
+  const azureOpenAIBasePath = await preferences.get(PLUGIN_NAME, "azureOpenAIBasePath");
+  const azureOpenAIApiInstanceName = await preferences.get(PLUGIN_NAME, "azureOpenAIApiInstanceName");
+  invokePluginFunc(MODULE_PATH, "chatWithDocs", data.message, customLLM, {
+    openAIApiKey: openAPIKey?.length > 0 ? openAPIKey : undefined,
+    azureOpenAIApiKey: await preferences.get(PLUGIN_NAME, "azureOpenAIApiKey"),
+    azureOpenAIApiVersion: await preferences.get(PLUGIN_NAME, "azureOpenAIApiVersion"),
+    azureOpenAIApiInstanceName: azureOpenAIApiInstanceName?.length > 0 ? azureOpenAIApiInstanceName : undefined,
+    azureOpenAIApiDeploymentName: await preferences.get(PLUGIN_NAME, "azureOpenAIApiDeploymentNameChat"),
+    azureOpenAIBasePath: azureOpenAIBasePath?.length > 0 ? azureOpenAIBasePath : undefined,
+    modelName: "gpt-3.5-turbo-16k",
+    temperature: 0.2,
+  }).then(async (text) => {
+    console.log("RAG Response:", text);
+    message.message = text;
+
+    events.emit(EventName.OnMessageResponseUpdate, message);
+  });
+}
+/**
+ * Initializes the plugin by registering the extension functions with the given register function.
+ * @param {Function} options.register - The function to use for registering the extension functions
+ */
+export function init({ register }: { register: RegisterExtensionPoint }) {
+  register(PluginService.OnStart, PLUGIN_NAME, onStart);
+  register(PluginService.OnPreferencesUpdate, PLUGIN_NAME, ingest);
+
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "ingestDocumentDirectoryPath",
+    "Document Ingest Directory Path",
+    "The URL of the directory containing the documents to ingest",
+    undefined
+  );
+
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "openAIApiKey",
+    "Open API Key",
+    "OpenAI API Key",
+    undefined
+  );
+
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "azureOpenAIApiKey",
+    "Azure API Key",
+    "Azure Project API Key",
+    undefined
+  );
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "azureOpenAIApiVersion",
+    "Azure API Version",
+    "Azure Project API Version",
+    undefined
+  );
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "azureOpenAIApiInstanceName",
+    "Azure Instance Name",
+    "Azure Project Instance Name",
+    undefined
+  );
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "azureOpenAIApiDeploymentNameChat",
+    "Azure Chat Model Deployment Name",
+    "Azure Project Chat Model Deployment Name (e.g. gpt-3.5-turbo-16k)",
+    undefined
+  );
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "azureOpenAIApiDeploymentNameRag",
+    "Azure Text Embedding Model Deployment Name",
+    "Azure Project Text Embedding Model Deployment Name (e.g. text-embedding-ada-002)",
+    undefined
+  );
+  preferences.registerPreferences<string>(
+    register,
+    PLUGIN_NAME,
+    "azureOpenAIBasePath",
+    "Azure Base Path",
+    "Azure Project Base Path",
+    undefined
+  );
+}
--- a/plugins/retrieval-plugin/src/module.ts
+++ b/plugins/retrieval-plugin/src/module.ts
@ -0,0 +1,58 @@
+const path = require("path");
+const { app } = require("electron");
+const { DirectoryLoader } = require("langchain/document_loaders/fs/directory");
+const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
+const { PDFLoader } = require("langchain/document_loaders/fs/pdf");
+const { CharacterTextSplitter } = require("langchain/text_splitter");
+const { FaissStore } = require("langchain/vectorstores/faiss");
+const { ChatOpenAI } = require("langchain/chat_models/openai");
+const { RetrievalQAChain } = require("langchain/chains");
+
+var db: any | undefined = undefined;
+
+/**
+ * Ingests documents from the specified directory
+ * If an `embedding` object is not provided, uses OpenAIEmbeddings.
+ * The resulting embeddings are stored in the database using Faiss.
+ * @param docDir - The directory containing the documents to ingest.
+ * @param embedding - An optional object used to generate embeddings for the documents.
+ * @param config - An optional configuration object used to create a new `OpenAIEmbeddings` object.
+ */
+async function ingest(docDir: string, embedding?: any, config?: any) {
+  const loader = new DirectoryLoader(docDir, {
+    ".pdf": (path) => new PDFLoader(path),
+  });
+  const docs = await loader.load();
+  const textSplitter = new CharacterTextSplitter();
+  const docsQA = await textSplitter.splitDocuments(docs);
+  const embeddings = embedding ?? new OpenAIEmbeddings({ ...config });
+  db = await FaissStore.fromDocuments(await docsQA, embeddings);
+  console.log("Documents are ingested");
+}
+
+/**
+ * Generates an answer to a given question using the specified `llm` or a new `ChatOpenAI`.
+ * The function uses the `RetrievalQAChain` class to retrieve the most relevant document from the database and generate an answer.
+ * @param question - The question to generate an answer for.
+ * @param llm - An optional object used to generate the answer.
+ * @param config - An optional configuration object used to create a new `ChatOpenAI` object, can be ignored if llm is specified.
+ * @returns A Promise that resolves with the generated answer.
+ */
+async function chatWithDocs(question: string, llm?: any, config?: any): Promise<any> {
+  const llm_question_answer =
+    llm ??
+    new ChatOpenAI({
+      temperature: 0.2,
+      ...config,
+    });
+  const qa = RetrievalQAChain.fromLLM(llm_question_answer, db.asRetriever(), {
+    verbose: true,
+  });
+  const answer = await qa.run(question);
+  return answer;
+}
+
+module.exports = {
+  ingest,
+  chatWithDocs,
+};
--- a/plugins/retrieval-plugin/tsconfig.json
+++ b/plugins/retrieval-plugin/tsconfig.json
@ -0,0 +1,13 @@
+{
+  "compilerOptions": {
+    "target": "es2016",
+    "module": "ES6",
+    "moduleResolution": "node",
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "strict": false,
+    "skipLibCheck": true
+  }
+}
--- a/plugins/retrieval-plugin/webpack.config.js
+++ b/plugins/retrieval-plugin/webpack.config.js
@ -0,0 +1,35 @@
+const path = require("path");
+const webpack = require("webpack");
+const packageJson = require("./package.json");
+
+module.exports = {
+  experiments: { outputModule: true },
+  entry: "./src/index.ts",
+  mode: "production",
+  module: {
+    rules: [
+      {
+        test: /\.tsx?$/,
+        use: "ts-loader",
+        exclude: /node_modules/,
+      },
+    ],
+  },
+  plugins: [
+    new webpack.DefinePlugin({
+      PLUGIN_NAME: JSON.stringify(packageJson.name),
+      MODULE_PATH: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
+    }),
+  ],
+  output: {
+    filename: "index.js",
+    path: path.resolve(__dirname, "dist"),
+    library: { type: "module" },
+  },
+  resolve: {
+    extensions: [".ts", ".js"],
+  },
+  optimization: {
+    minimize: false,
+  },
+};
--- a/web/app/_components/Preferences.tsx
+++ b/web/app/_components/Preferences.tsx
@ -149,6 +149,7 @@ export const Preferences = () => {
    if (timeout) {
      clearTimeout(timeout)
    }
+    if(extensionPoints.get(PluginService.OnPreferencesUpdate))
    timeout = setTimeout(() => execute(PluginService.OnPreferencesUpdate), 100)
  }