feat: Groq Inference Extension (#2263)

* feat: Groq Inference Extension * Add Groq supported models * Fix folder typo * Add Groq options to interface and new API Key saving, tested working * Fix linting
2024-03-17 17:40:20 -06:00 · 2024-03-17 17:40:20 -06:00 · 0348aa3321
commit 0348aa3321
parent 6c81d83d30
15 changed files with 631 additions and 9 deletions
--- a/core/src/node/helper/config.ts
+++ b/core/src/node/helper/config.ts
@ -118,7 +118,7 @@ const exec = async (command: string): Promise<string> => {
 }
 export const getEngineConfiguration = async (engineId: string) => {
-  if (engineId !== 'openai') {
+  if (engineId !== 'openai' && engineId !== 'groq') {
    return undefined
  }
  const directoryPath = join(getJanDataFolderPath(), 'engines')
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -18,6 +18,7 @@ export type ModelInfo = {
 export enum InferenceEngine {
  nitro = 'nitro',
  openai = 'openai',
  groq = 'groq',
  triton_trtllm = 'triton_trtllm',
  nitro_tensorrt_llm = 'nitro-tensorrt-llm',
--- a/extensions/inference-groq-extension/README.md
+++ b/extensions/inference-groq-extension/README.md
@ -0,0 +1,78 @@
 # Jan inference plugin
 Created using Jan app example
 # Create a Jan Plugin using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀
 ## Create Your Own Plugin
 To create your own plugin, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your plugin directory now
 ## Update the Plugin Metadata
 The [`package.json`](package.json) file defines metadata about your plugin, such as
 plugin name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your plugin.
 ## Update the Plugin Code
 The [`src/`](./src/) directory is the heart of your plugin! This contains the
 source code that will be run when your plugin extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your plugin code:
 - Most Jan Plugin Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { core } from "@janhq/core";
  function onStart(): Promise<any> {
    return core.invokePluginFunc(MODULE_PATH, "run", 0);
  }
  ```
  For more information about the Jan Plugin Core module, see the
  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your plugin!
--- a/extensions/inference-groq-extension/package.json
+++ b/extensions/inference-groq-extension/package.json
@ -0,0 +1,41 @@
 {
  "name": "@janhq/inference-groq-extension",
  "version": "1.0.0",
  "description": "This extension enables fast Groq chat completion API calls",
  "main": "dist/index.js",
  "module": "dist/module.js",
  "author": "Carsen Klock & Jan",
  "license": "AGPL-3.0",
  "scripts": {
    "build": "tsc -b . && webpack --config webpack.config.js",
    "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../pre-install"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/module.js"
  },
  "devDependencies": {
    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
    "webpack": "^5.88.2",
    "webpack-cli": "^5.1.4",
    "ts-loader": "^9.5.0"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
    "fetch-retry": "^5.0.6",
    "path-browserify": "^1.0.1",
    "ulid": "^2.3.0"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "fetch-retry"
  ]
 }
--- a/extensions/inference-groq-extension/src/@types/global.d.ts
+++ b/extensions/inference-groq-extension/src/@types/global.d.ts
@ -0,0 +1,16 @@
 declare const MODULE: string
 declare const GROQ_DOMAIN: string
 declare interface EngineSettings {
  full_url?: string
  api_key?: string
 }
 enum GroqChatCompletionModelName {
  'mixtral-8x7b-32768' = 'mixtral-8x7b-32768',
  'llama2-70b-4096' = 'llama2-70b-4096',
 }
 declare type GroqModel = Omit<Model, 'id'> & {
  id: GroqChatCompletionModelName
 }
--- a/extensions/inference-groq-extension/src/helpers/sse.ts
+++ b/extensions/inference-groq-extension/src/helpers/sse.ts
@ -0,0 +1,83 @@
 import { ErrorCode } from '@janhq/core'
 import { Observable } from 'rxjs'
 /**
 * Sends a request to the inference server to generate a response based on the recent messages.
 * @param recentMessages - An array of recent messages to use as context for the inference.
 * @param engine - The engine settings to use for the inference.
 * @param model - The model to use for the inference.
 * @returns An Observable that emits the generated response as a string.
 */
 export function requestInference(
  recentMessages: any[],
  engine: EngineSettings,
  model: GroqModel,
  controller?: AbortController
 ): Observable<string> {
  return new Observable((subscriber) => {
    // let model_id: string = model.id
    const requestBody = JSON.stringify({
      messages: recentMessages,
      stream: true,
      model: model.id,
      ...model.parameters,
    })
    fetch(`${engine.full_url}`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Accept': model.parameters.stream
          ? 'text/event-stream'
          : 'application/json',
        'Access-Control-Allow-Origin': '*',
        'Authorization': `Bearer ${engine.api_key}`,
        // 'api-key': `${engine.api_key}`,
      },
      body: requestBody,
      signal: controller?.signal,
    })
      .then(async (response) => {
        if (!response.ok) {
          const data = await response.json()
          const error = {
            message: data.error?.message ?? 'An error occurred.',
            code: data.error?.code ?? ErrorCode.Unknown,
          }
          subscriber.error(error)
          subscriber.complete()
          return
        }
        if (model.parameters.stream === false) {
          const data = await response.json()
          subscriber.next(data.choices[0]?.message?.content ?? '')
        } else {
          const stream = response.body
          const decoder = new TextDecoder('utf-8')
          const reader = stream?.getReader()
          let content = ''
          while (true && reader) {
            const { done, value } = await reader.read()
            if (done) {
              break
            }
            const text = decoder.decode(value)
            const lines = text.trim().split('\n')
            for (const line of lines) {
              if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
                const data = JSON.parse(line.replace('data: ', ''))
                content += data.choices[0]?.delta?.content ?? ''
                if (content.startsWith('assistant: ')) {
                  content = content.replace('assistant: ', '')
                }
                subscriber.next(content)
              }
            }
          }
        }
        subscriber.complete()
      })
      .catch((err) => subscriber.error(err))
  })
 }
--- a/extensions/inference-groq-extension/src/index.ts
+++ b/extensions/inference-groq-extension/src/index.ts
@ -0,0 +1,224 @@
 /**
 * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 * @version 1.0.0
 * @module inference-groq-extension/src/index
 */
 import {
  ChatCompletionRole,
  ContentType,
  MessageRequest,
  MessageStatus,
  ThreadContent,
  ThreadMessage,
  events,
  fs,
  InferenceEngine,
  BaseExtension,
  MessageEvent,
  MessageRequestType,
  ModelEvent,
  InferenceEvent,
  AppConfigurationEventName,
  joinPath,
 } from '@janhq/core'
 import { requestInference } from './helpers/sse'
 import { ulid } from 'ulid'
 import { join } from 'path'
 /**
 * A class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
 export default class JanInferenceGroqExtension extends BaseExtension {
  private static readonly _engineDir = 'file://engines'
  private static readonly _engineMetadataFileName = 'groq.json'
  private static _currentModel: GroqModel
  private static _engineSettings: EngineSettings = {
    full_url: 'https://api.groq.com/openai/v1/chat/completions',
    api_key: 'gsk-<your key here>',
  }
  controller = new AbortController()
  isCancelled = false
  /**
   * Subscribes to events emitted by the @janhq/core package.
   */
  async onLoad() {
    if (!(await fs.existsSync(JanInferenceGroqExtension._engineDir))) {
      await fs
        .mkdirSync(JanInferenceGroqExtension._engineDir)
        .catch((err) => console.debug(err))
    }
    JanInferenceGroqExtension.writeDefaultEngineSettings()
    // Events subscription
    events.on(MessageEvent.OnMessageSent, (data) =>
      JanInferenceGroqExtension.handleMessageRequest(data, this)
    )
    events.on(ModelEvent.OnModelInit, (model: GroqModel) => {
      JanInferenceGroqExtension.handleModelInit(model)
    })
    events.on(ModelEvent.OnModelStop, (model: GroqModel) => {
      JanInferenceGroqExtension.handleModelStop(model)
    })
    events.on(InferenceEvent.OnInferenceStopped, () => {
      JanInferenceGroqExtension.handleInferenceStopped(this)
    })
    const settingsFilePath = await joinPath([
      JanInferenceGroqExtension._engineDir,
      JanInferenceGroqExtension._engineMetadataFileName,
    ])
    events.on(
      AppConfigurationEventName.OnConfigurationUpdate,
      (settingsKey: string) => {
        // Update settings on changes
        if (settingsKey === settingsFilePath)
          JanInferenceGroqExtension.writeDefaultEngineSettings()
      }
    )
  }
  /**
   * Stops the model inference.
   */
  onUnload(): void {}
  static async writeDefaultEngineSettings() {
    try {
      const engineFile = join(
        JanInferenceGroqExtension._engineDir,
        JanInferenceGroqExtension._engineMetadataFileName
      )
      if (await fs.existsSync(engineFile)) {
        const engine = await fs.readFileSync(engineFile, 'utf-8')
        JanInferenceGroqExtension._engineSettings =
          typeof engine === 'object' ? engine : JSON.parse(engine)
      } else {
        await fs.writeFileSync(
          engineFile,
          JSON.stringify(JanInferenceGroqExtension._engineSettings, null, 2)
        )
      }
    } catch (err) {
      console.error(err)
    }
  }
  private static async handleModelInit(model: GroqModel) {
    if (model.engine !== InferenceEngine.groq) {
      return
    } else {
      JanInferenceGroqExtension._currentModel = model
      JanInferenceGroqExtension.writeDefaultEngineSettings()
      // Todo: Check model list with API key
      events.emit(ModelEvent.OnModelReady, model)
    }
  }
  private static async handleModelStop(model: GroqModel) {
    if (model.engine !== 'groq') {
      return
    }
    events.emit(ModelEvent.OnModelStopped, model)
  }
  private static async handleInferenceStopped(
    instance: JanInferenceGroqExtension
  ) {
    instance.isCancelled = true
    instance.controller?.abort()
  }
  /**
   * Handles a new message request by making an inference request and emitting events.
   * Function registered in event manager, should be static to avoid binding issues.
   * Pass instance as a reference.
   * @param {MessageRequest} data - The data for the new message request.
   */
  private static async handleMessageRequest(
    data: MessageRequest,
    instance: JanInferenceGroqExtension
  ) {
    if (data.model.engine !== 'groq') {
      return
    }
    const timestamp = Date.now()
    const message: ThreadMessage = {
      id: ulid(),
      thread_id: data.threadId,
      type: data.type,
      assistant_id: data.assistantId,
      role: ChatCompletionRole.Assistant,
      content: [],
      status: MessageStatus.Pending,
      created: timestamp,
      updated: timestamp,
      object: 'thread.message',
    }
    if (data.type !== MessageRequestType.Summary) {
      events.emit(MessageEvent.OnMessageResponse, message)
    }
    instance.isCancelled = false
    instance.controller = new AbortController()
    requestInference(
      data?.messages ?? [],
      this._engineSettings,
      {
        ...JanInferenceGroqExtension._currentModel,
        parameters: data.model.parameters,
      },
      instance.controller
    ).subscribe({
      next: (content) => {
        const messageContent: ThreadContent = {
          type: ContentType.Text,
          text: {
            value: content.trim(),
            annotations: [],
          },
        }
        message.content = [messageContent]
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      complete: async () => {
        message.status = message.content.length
          ? MessageStatus.Ready
          : MessageStatus.Error
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      error: async (err) => {
        if (instance.isCancelled || message.content.length > 0) {
          message.status = MessageStatus.Stopped
          events.emit(MessageEvent.OnMessageUpdate, message)
          return
        }
        const messageContent: ThreadContent = {
          type: ContentType.Text,
          text: {
            value: 'An error occurred. ' + err.message,
            annotations: [],
          },
        }
        message.content = [messageContent]
        message.status = MessageStatus.Error
        message.error_code = err.code
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
    })
  }
 }
--- a/extensions/inference-groq-extension/tsconfig.json
+++ b/extensions/inference-groq-extension/tsconfig.json
@ -0,0 +1,14 @@
 {
  "compilerOptions": {
    "target": "es2016",
    "module": "ES6",
    "moduleResolution": "node",
    "outDir": "./dist",
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
    "rootDir": "./src"
  },
  "include": ["./src"]
 }
--- a/extensions/inference-groq-extension/webpack.config.js
+++ b/extensions/inference-groq-extension/webpack.config.js
@ -0,0 +1,39 @@
 const path = require('path')
 const webpack = require('webpack')
 const packageJson = require('./package.json')
 module.exports = {
  experiments: { outputModule: true },
  entry: './src/index.ts', // Adjust the entry point to match your project's main file
  mode: 'production',
  module: {
    rules: [
      {
        test: /\.tsx?$/,
        use: 'ts-loader',
        exclude: /node_modules/,
      },
    ],
  },
  plugins: [
    new webpack.DefinePlugin({
      MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
      GROQ_DOMAIN: JSON.stringify('api.groq.com'),
    }),
  ],
  output: {
    filename: 'index.js', // Adjust the output file name as needed
    path: path.resolve(__dirname, 'dist'),
    library: { type: 'module' }, // Specify ESM output format
  },
  resolve: {
    extensions: ['.ts', '.js'],
    fallback: {
      path: require.resolve('path-browserify'),
    },
  },
  optimization: {
    minimize: false,
  },
  // Add loaders and other configuration as needed for your project
 }
--- a/models/groq-llama2-70b/model.json
+++ b/models/groq-llama2-70b/model.json
@ -0,0 +1,27 @@
 {
    "sources": [
      {
        "url": "https://groq.com"
      }
    ],
    "id": "llama2-70b-4096",
    "object": "model",
    "name": "Groq Llama 2 70b",
    "version": "1.0",
    "description": "Groq Llama 2 70b with supercharged speed!",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "top_p": 1,
      "stop": null,
      "stream": true
    },
    "metadata": {
      "author": "Meta",
      "tags": ["General", "Big Context Length"]
    },
    "engine": "groq"
  }
--- a/models/groq-mixtral-8x7b-instruct/model.json
+++ b/models/groq-mixtral-8x7b-instruct/model.json
@ -0,0 +1,27 @@
 {
    "sources": [
      {
        "url": "https://groq.com"
      }
    ],
    "id": "mixtral-8x7b-32768",
    "object": "model",
    "name": "Groq Mixtral 8x7b Instruct",
    "version": "1.0",
    "description": "Groq Mixtral 8x7b Instruct is Mixtral with supercharged speed!",
    "format": "api",
    "settings": {},
    "parameters": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "top_p": 1,
      "stop": null,
      "stream": true
    },
    "metadata": {
      "author": "Mistral",
      "tags": ["General", "Big Context Length"]
    },
    "engine": "groq"
  }
--- a/web/containers/DropdownListSidebar/index.tsx
+++ b/web/containers/DropdownListSidebar/index.tsx
@ -78,7 +78,9 @@ const DropdownListSidebar = ({
    (model) => model.engine !== InferenceEngine.openai
  )
  const remoteModel = downloadedModels.filter(
-    (model) => model.engine === InferenceEngine.openai
+    (model) =>
      model.engine === InferenceEngine.openai ||
      model.engine === InferenceEngine.groq
  )
  const modelOptions = isTabActive === 0 ? localModel : remoteModel
--- a/web/containers/OpenAiKeyInput/index.tsx
+++ b/web/containers/OpenAiKeyInput/index.tsx
@ -19,16 +19,49 @@ const OpenAiKeyInput: React.FC = () => {
  >(undefined)
  const { readOpenAISettings, saveOpenAISettings } = useEngineSettings()
  const [groqSettings, setGroqSettings] = useState<
    { api_key: string } | undefined
  >(undefined)
  const { readGroqSettings, saveGroqSettings } = useEngineSettings()
  useEffect(() => {
    readOpenAISettings().then((settings) => {
      setOpenAISettings(settings)
    })
  }, [readOpenAISettings])
-  if (!selectedModel || selectedModel.engine !== InferenceEngine.openai) {
+  useEffect(() => {
    readGroqSettings().then((settings) => {
      setGroqSettings(settings)
    })
  }, [readGroqSettings])
  if (
    !selectedModel ||
    (selectedModel.engine !== InferenceEngine.openai &&
      selectedModel.engine !== InferenceEngine.groq)
  ) {
    return null
  }
  const getCurrentApiKey = () => {
    if (selectedModel.engine === InferenceEngine.openai) {
      return openAISettings?.api_key
    } else if (selectedModel.engine === InferenceEngine.groq) {
      return groqSettings?.api_key
    }
    return '' // Default return value
  }
  const handleApiKeyChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    const newApiKey = e.target.value
    if (selectedModel.engine === InferenceEngine.openai) {
      saveOpenAISettings({ apiKey: newApiKey })
    } else if (selectedModel.engine === InferenceEngine.groq) {
      saveGroqSettings({ apiKey: newApiKey })
    }
  }
  return (
    <div className="my-4">
      <label
@ -40,11 +73,9 @@ const OpenAiKeyInput: React.FC = () => {
      <Input
        disabled={serverEnabled}
        id="assistant-instructions"
-        placeholder="Enter your API_KEY"
+        placeholder={getCurrentApiKey()}
-        defaultValue={openAISettings?.api_key}
+        defaultValue={getCurrentApiKey()}
-        onChange={(e) => {
+        onChange={handleApiKeyChange}
          saveOpenAISettings({ apiKey: e.target.value })
        }}
      />
    </div>
  )
--- a/web/hooks/useEngineSettings.ts
+++ b/web/hooks/useEngineSettings.ts
@ -36,5 +36,43 @@ export const useEngineSettings = () => {
      settingFilePath
    )
  }
-  return { readOpenAISettings, saveOpenAISettings }
+
  const readGroqSettings = useCallback(async () => {
    if (!(await fs.existsSync(await joinPath(['file://engines', 'groq.json']))))
      return {}
    const settings = await fs.readFileSync(
      await joinPath(['file://engines', 'groq.json']),
      'utf-8'
    )
    if (settings) {
      return typeof settings === 'object' ? settings : JSON.parse(settings)
    }
    return {}
  }, [])
  const saveGroqSettings = async ({
    apiKey,
  }: {
    apiKey: string | undefined
  }) => {
    const settings = await readGroqSettings()
    const settingFilePath = await joinPath(['file://engines', 'groq.json'])
    settings.api_key = apiKey
    await fs.writeFileSync(settingFilePath, JSON.stringify(settings))
    // Sec: Don't attach the settings data to the event
    events.emit(
      AppConfigurationEventName.OnConfigurationUpdate,
      settingFilePath
    )
  }
  return {
    readOpenAISettings,
    saveOpenAISettings,
    readGroqSettings,
    saveGroqSettings,
  }
 }
--- a/web/screens/Settings/Models/Row.tsx
+++ b/web/screens/Settings/Models/Row.tsx
@ -49,6 +49,7 @@ export default function RowModel(props: RowModelProps) {
  const isRemoteModel =
    props.data.engine === InferenceEngine.openai ||
    props.data.engine === InferenceEngine.groq ||
    props.data.engine === InferenceEngine.triton_trtllm
  const onModelActionClick = (modelId: string) => {