feat: chat with documents plugin (#417)

* chore: vectordb driver plugin

# Conflicts:
#	plugins/monitoring-plugin/package.json

* chore: add langchain & index documents

* feat: chat with documents plugin

* chore: correct build step

---------

Co-authored-by: namvuong <22463238+vuonghoainam@users.noreply.github.com>
This commit is contained in:
Louis 2023-10-20 21:03:46 +07:00 committed by GitHub
parent 0f5ef6473b
commit d9e32dcc02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 397 additions and 9 deletions

View File

@ -47,15 +47,6 @@ function updateFinishedDownloadAt(_id: string): Promise<any> {
return store.updateMany("models", { _id }, { time: Date.now(), finishDownloadAt: 1 });
}
/**
* Retrieves all unfinished models from the database.
*
* @returns A promise that resolves with an array of unfinished models.
*/
function getUnfinishedDownloadModels(): Promise<any> {
return store.findMany("models", { finishDownloadAt: -1 }, [{ startDownloadAt: "desc" }]);
}
/**
* Retrieves all finished models from the database.
*

View File

@ -0,0 +1,2 @@
declare const PLUGIN_NAME: string;
declare const MODULE_PATH: string;

View File

@ -0,0 +1,73 @@
# Create a Jan Plugin using Typescript
Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀
## Create Your Own Plugin
To create your own plugin, you can use this repository as a template! Just follow the below instructions:
1. Click the Use this template button at the top of the repository
2. Select Create a new repository
3. Select an owner and name for your new repository
4. Click Create repository
5. Clone your new repository
## Initial Setup
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin.
> [!NOTE]
>
> You'll need to have a reasonably modern version of
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
> [`nodenv`](https://github.com/nodenv/nodenv) or
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
> root of your repository to install the version specified in
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
1. :hammer_and_wrench: Install the dependencies
```bash
npm install
```
1. :building_construction: Package the TypeScript for distribution
```bash
npm run bundle
```
1. :white_check_mark: Check your artifact
There will be a tgz file in your plugin directory now
## Update the Plugin Metadata
The [`package.json`](package.json) file defines metadata about your plugin, such as
plugin name, main entry, description and version.
When you copy this repository, update `package.json` with the name, description for your plugin.
## Update the Plugin Code
The [`src/`](./src/) directory is the heart of your plugin! This contains the
source code that will be run when your plugin extension functions are invoked. You can replace the
contents of this directory with your own code.
There are a few things to keep in mind when writing your plugin code:
- Most Jan Plugin Extension functions are processed asynchronously.
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
```typescript
import { core } from "@janhq/core";
function onStart(): Promise<any> {
return core.invokePluginFunc(MODULE_PATH, "run", 0);
}
```
For more information about the Jan Plugin Core module, see the
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
So, what are you waiting for? Go ahead and start customizing your plugin!

View File

@ -0,0 +1,43 @@
{
"name": "retrieval-plugin",
"version": "1.0.0",
"description": "Retrieval plugin for Jan app",
"icon": "https://raw.githubusercontent.com/tailwindlabs/heroicons/88e98b0c2b458553fbadccddc2d2f878edc0387b/src/20/solid/circle-stack.svg",
"main": "dist/index.js",
"module": "dist/module.js",
"author": "Jan <service@jan.ai>",
"license": "MIT",
"activationPoints": [
"init"
],
"scripts": {
"build": "tsc -b . && webpack --config webpack.config.js",
"bundle": "npm pack"
},
"devDependencies": {
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4"
},
"dependencies": {
"@janhq/core": "^0.1.1",
"faiss-node": "^0.5.1",
"install": "^0.13.0",
"langchain": "^0.0.169",
"npm": "^10.2.0",
"pdf-parse": "^1.1.1",
"ts-loader": "^9.5.0"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"pdf-parse",
"langchain",
"faiss-node"
]
}

View File

@ -0,0 +1,172 @@
/**
* The entrypoint for the plugin.
*/
import {
EventName,
NewMessageRequest,
PluginService,
RegisterExtensionPoint,
invokePluginFunc,
events,
preferences,
store,
} from "@janhq/core";
/**
* Register event listener.
*/
const registerListener = () => {
events.on(EventName.OnNewMessageRequest, inferenceRequest);
};
/**
* Invokes the `ingest` function from the `module.js` file using the `invokePluginFunc` method.
* "ingest" is the name of the function to invoke.
* @returns {Promise<any>} A promise that resolves with the result of the `run` function.
*/
function onStart(): Promise<void> {
registerListener();
ingest();
return Promise.resolve();
}
/**
* Retrieves the document ingestion directory path from the `preferences` module and invokes the `ingest` function
* from the specified module with the directory path and additional options.
* The additional options are retrieved from the `preferences` module using the `PLUGIN_NAME` constant.
*/
async function ingest() {
const path = await preferences.get(PLUGIN_NAME, "ingestDocumentDirectoryPath");
// TODO: Hiro - Add support for custom embeddings
const customizedEmbedding = undefined;
if (path && path.length > 0) {
const openAPIKey = await preferences.get(PLUGIN_NAME, "openAIApiKey");
const azureOpenAIBasePath = await preferences.get(PLUGIN_NAME, "azureOpenAIBasePath");
const azureOpenAIApiInstanceName = await preferences.get(PLUGIN_NAME, "azureOpenAIApiInstanceName");
invokePluginFunc(MODULE_PATH, "ingest", path, customizedEmbedding, {
openAIApiKey: openAPIKey?.length > 0 ? openAPIKey : undefined,
azureOpenAIApiKey: await preferences.get(PLUGIN_NAME, "azureOpenAIApiKey"),
azureOpenAIApiVersion: await preferences.get(PLUGIN_NAME, "azureOpenAIApiVersion"),
azureOpenAIApiInstanceName: azureOpenAIApiInstanceName?.length > 0 ? azureOpenAIApiInstanceName : undefined,
azureOpenAIApiDeploymentName: await preferences.get(PLUGIN_NAME, "azureOpenAIApiDeploymentNameRag"),
azureOpenAIBasePath: azureOpenAIBasePath?.length > 0 ? azureOpenAIBasePath : undefined,
});
}
}
/**
* Retrieves the document ingestion directory path from the `preferences` module and invokes the `ingest` function
* from the specified module with the directory path and additional options.
* The additional options are retrieved from the `preferences` module using the `PLUGIN_NAME` constant.
*/
async function inferenceRequest(data: NewMessageRequest): Promise<any> {
// TODO: Hiro - Add support for custom embeddings
const customLLM = undefined;
const message = {
...data,
message: "",
user: "RAG",
createdAt: new Date().toISOString(),
_id: undefined,
};
const id = await store.insertOne("messages", message);
message._id = id;
events.emit(EventName.OnNewMessageResponse, message);
const openAPIKey = await preferences.get(PLUGIN_NAME, "openAIApiKey");
const azureOpenAIBasePath = await preferences.get(PLUGIN_NAME, "azureOpenAIBasePath");
const azureOpenAIApiInstanceName = await preferences.get(PLUGIN_NAME, "azureOpenAIApiInstanceName");
invokePluginFunc(MODULE_PATH, "chatWithDocs", data.message, customLLM, {
openAIApiKey: openAPIKey?.length > 0 ? openAPIKey : undefined,
azureOpenAIApiKey: await preferences.get(PLUGIN_NAME, "azureOpenAIApiKey"),
azureOpenAIApiVersion: await preferences.get(PLUGIN_NAME, "azureOpenAIApiVersion"),
azureOpenAIApiInstanceName: azureOpenAIApiInstanceName?.length > 0 ? azureOpenAIApiInstanceName : undefined,
azureOpenAIApiDeploymentName: await preferences.get(PLUGIN_NAME, "azureOpenAIApiDeploymentNameChat"),
azureOpenAIBasePath: azureOpenAIBasePath?.length > 0 ? azureOpenAIBasePath : undefined,
modelName: "gpt-3.5-turbo-16k",
temperature: 0.2,
}).then(async (text) => {
console.log("RAG Response:", text);
message.message = text;
events.emit(EventName.OnMessageResponseUpdate, message);
});
}
/**
* Initializes the plugin by registering the extension functions with the given register function.
* @param {Function} options.register - The function to use for registering the extension functions
*/
export function init({ register }: { register: RegisterExtensionPoint }) {
register(PluginService.OnStart, PLUGIN_NAME, onStart);
register(PluginService.OnPreferencesUpdate, PLUGIN_NAME, ingest);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"ingestDocumentDirectoryPath",
"Document Ingest Directory Path",
"The URL of the directory containing the documents to ingest",
undefined
);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"openAIApiKey",
"Open API Key",
"OpenAI API Key",
undefined
);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"azureOpenAIApiKey",
"Azure API Key",
"Azure Project API Key",
undefined
);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"azureOpenAIApiVersion",
"Azure API Version",
"Azure Project API Version",
undefined
);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"azureOpenAIApiInstanceName",
"Azure Instance Name",
"Azure Project Instance Name",
undefined
);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"azureOpenAIApiDeploymentNameChat",
"Azure Chat Model Deployment Name",
"Azure Project Chat Model Deployment Name (e.g. gpt-3.5-turbo-16k)",
undefined
);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"azureOpenAIApiDeploymentNameRag",
"Azure Text Embedding Model Deployment Name",
"Azure Project Text Embedding Model Deployment Name (e.g. text-embedding-ada-002)",
undefined
);
preferences.registerPreferences<string>(
register,
PLUGIN_NAME,
"azureOpenAIBasePath",
"Azure Base Path",
"Azure Project Base Path",
undefined
);
}

View File

@ -0,0 +1,58 @@
const path = require("path");
const { app } = require("electron");
const { DirectoryLoader } = require("langchain/document_loaders/fs/directory");
const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
const { PDFLoader } = require("langchain/document_loaders/fs/pdf");
const { CharacterTextSplitter } = require("langchain/text_splitter");
const { FaissStore } = require("langchain/vectorstores/faiss");
const { ChatOpenAI } = require("langchain/chat_models/openai");
const { RetrievalQAChain } = require("langchain/chains");
var db: any | undefined = undefined;
/**
* Ingests documents from the specified directory
* If an `embedding` object is not provided, uses OpenAIEmbeddings.
* The resulting embeddings are stored in the database using Faiss.
* @param docDir - The directory containing the documents to ingest.
* @param embedding - An optional object used to generate embeddings for the documents.
* @param config - An optional configuration object used to create a new `OpenAIEmbeddings` object.
*/
async function ingest(docDir: string, embedding?: any, config?: any) {
const loader = new DirectoryLoader(docDir, {
".pdf": (path) => new PDFLoader(path),
});
const docs = await loader.load();
const textSplitter = new CharacterTextSplitter();
const docsQA = await textSplitter.splitDocuments(docs);
const embeddings = embedding ?? new OpenAIEmbeddings({ ...config });
db = await FaissStore.fromDocuments(await docsQA, embeddings);
console.log("Documents are ingested");
}
/**
* Generates an answer to a given question using the specified `llm` or a new `ChatOpenAI`.
* The function uses the `RetrievalQAChain` class to retrieve the most relevant document from the database and generate an answer.
* @param question - The question to generate an answer for.
* @param llm - An optional object used to generate the answer.
* @param config - An optional configuration object used to create a new `ChatOpenAI` object, can be ignored if llm is specified.
* @returns A Promise that resolves with the generated answer.
*/
async function chatWithDocs(question: string, llm?: any, config?: any): Promise<any> {
const llm_question_answer =
llm ??
new ChatOpenAI({
temperature: 0.2,
...config,
});
const qa = RetrievalQAChain.fromLLM(llm_question_answer, db.asRetriever(), {
verbose: true,
});
const answer = await qa.run(question);
return answer;
}
module.exports = {
ingest,
chatWithDocs,
};

View File

@ -0,0 +1,13 @@
{
"compilerOptions": {
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"rootDir": "./src",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true
}
}

View File

@ -0,0 +1,35 @@
const path = require("path");
const webpack = require("webpack");
const packageJson = require("./package.json");
module.exports = {
experiments: { outputModule: true },
entry: "./src/index.ts",
mode: "production",
module: {
rules: [
{
test: /\.tsx?$/,
use: "ts-loader",
exclude: /node_modules/,
},
],
},
plugins: [
new webpack.DefinePlugin({
PLUGIN_NAME: JSON.stringify(packageJson.name),
MODULE_PATH: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
}),
],
output: {
filename: "index.js",
path: path.resolve(__dirname, "dist"),
library: { type: "module" },
},
resolve: {
extensions: [".ts", ".js"],
},
optimization: {
minimize: false,
},
};

View File

@ -149,6 +149,7 @@ export const Preferences = () => {
if (timeout) {
clearTimeout(timeout)
}
if(extensionPoints.get(PluginService.OnPreferencesUpdate))
timeout = setTimeout(() => execute(PluginService.OnPreferencesUpdate), 100)
}