From dfd9c5415482d1bf3ddf70621fd4745bd53dd1f1 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 20 Aug 2024 17:01:51 +0700
Subject: [PATCH] feat: import model with gguf metadata extraction

---
 extensions/model-extension/package.json      |  24 +-
 extensions/model-extension/rollup.config.ts  |  35 ++-
 extensions/model-extension/src/index.ts      | 257 +++----------------
 extensions/model-extension/src/node/index.ts | 221 ++++------------
 4 files changed, 134 insertions(+), 403 deletions(-)

diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index 6bd8bbe5e..48732bf87 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -19,21 +19,22 @@
     "build:publish": "run-script-os"
   },
   "devDependencies": {
-    "cpx": "^1.5.0",
-    "download-cli": "^1.1.1",
-    "rimraf": "^3.0.2",
-    "ts-loader": "^9.5.0",
-    "typescript": "5.3.3",
     "@rollup/plugin-commonjs": "^25.0.7",
     "@rollup/plugin-json": "^6.1.0",
     "@rollup/plugin-node-resolve": "^15.2.3",
     "@rollup/plugin-replace": "^5.0.5",
     "@rollup/plugin-typescript": "^11.1.6",
     "@types/pdf-parse": "^1.1.4",
+    "cpx": "^1.5.0",
+    "download-cli": "^1.1.1",
+    "rimraf": "^3.0.2",
     "rollup": "^2.38.5",
     "rollup-plugin-define": "^1.0.1",
     "rollup-plugin-sourcemaps": "^0.6.3",
-    "rollup-plugin-typescript2": "^0.36.0"
+    "rollup-plugin-typescript2": "^0.36.0",
+    "run-script-os": "^1.1.6",
+    "ts-loader": "^9.5.0",
+    "typescript": "5.3.3"
   },
   "files": [
     "dist/*",
@@ -41,8 +42,15 @@
     "README.md"
   ],
   "dependencies": {
-    "@janhq/core": "file:../../core",
     "@huggingface/gguf": "^0.0.11",
+    "@huggingface/jinja": "^0.3.0",
+    "@janhq/core": "file:../../core",
+    "hyllama": "^0.2.2",
     "python-shell": "^5.0.0"
-  }
+  },
+  "bundleDependencies": [
+    "hyllama",
+    "@huggingface/gguf",
+    "@huggingface/jinja"
+  ]
 }
diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts
index aa22bd1f6..c3f3acc77 100644
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@@ -3,7 +3,7 @@ import sourceMaps from 'rollup-plugin-sourcemaps'
 import typescript from 'rollup-plugin-typescript2'
 import json from '@rollup/plugin-json'
 import replace from '@rollup/plugin-replace'
-
+import commonjs from '@rollup/plugin-commonjs'
 const settingJson = require('./resources/settings.json')
 const packageJson = require('./package.json')
 const defaultModelJson = require('./resources/default-model.json')
@@ -39,6 +39,39 @@ export default [
         browser: true,
       }),
 
+      // Resolve source maps to the original source
+      sourceMaps(),
+    ],
+  },
+  {
+    input: `src/node/index.ts`,
+    output: [
+      {
+        file: 'dist/node/index.cjs.js',
+        format: 'cjs',
+        sourcemap: true,
+        inlineDynamicImports: true,
+      },
+    ],
+    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+    external: ['@janhq/core/node'],
+    watch: {
+      include: 'src/node/**',
+    },
+    plugins: [
+      // Allow json resolution
+      json(),
+      // Compile TypeScript files
+      typescript({ useTsconfigDeclarationDir: true }),
+      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+      commonjs(),
+      // Allow node_modules resolution, so you can use 'external' to control
+      // which external modules to include in the bundle
+      // https://github.com/rollup/rollup-plugin-node-resolve#usage
+      resolve({
+        extensions: ['.ts', '.js', '.json'],
+      }),
+
       // Resolve source maps to the original source
       sourceMaps(),
     ],
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 7561ee6ed..a8977e07e 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -19,8 +19,6 @@ import {
   DownloadRequest,
   executeOnMain,
   HuggingFaceRepoData,
-  Quantization,
-  log,
   getFileSize,
   AllQuantizations,
   ModelEvent,
@@ -565,6 +563,19 @@ export default class JanModelExtension extends ModelExtension {
     }
 
     const defaultModel = (await this.getDefaultModel()) as Model
+    const metadata = await executeOnMain(
+      NODE,
+      'retrieveGGUFMetadata',
+      await joinPath([
+        await getJanDataFolderPath(),
+        'models',
+        dirName,
+        binaryFileName,
+      ])
+    )
+
+    const eos_id = metadata['tokenizer.ggml.eos_token_id']
+
     if (!defaultModel) {
       console.error('Unable to find default model')
       return
@@ -581,8 +592,18 @@ export default class JanModelExtension extends ModelExtension {
           filename: binaryFileName,
         },
       ],
+      parameters: {
+        ...defaultModel.parameters,
+        stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''],
+      },
       settings: {
         ...defaultModel.settings,
+        prompt_template:
+          metadata.parsed_chat_template ??
+          defaultModel.settings.prompt_template,
+        ctx_len:
+          metadata['llama.context_length'] ?? defaultModel.settings.ctx_len,
+        ngl: (metadata['llama.block_count'] ?? 32) + 1,
         llama_model_path: binaryFileName,
       },
       created: Date.now(),
@@ -657,6 +678,13 @@ export default class JanModelExtension extends ModelExtension {
       return
     }
 
+    const metadata = await executeOnMain(
+      NODE,
+      'retrieveGGUFMetadata',
+      modelBinaryPath
+    )
+    const eos_id = metadata['tokenizer.ggml.eos_token_id']
+
     const binaryFileName = await baseName(modelBinaryPath)
 
     const model: Model = {
@@ -669,8 +697,19 @@ export default class JanModelExtension extends ModelExtension {
           filename: binaryFileName,
         },
       ],
+      parameters: {
+        ...defaultModel.parameters,
+        stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''],
+      },
+
       settings: {
         ...defaultModel.settings,
+        prompt_template:
+          metadata.parsed_chat_template ??
+          defaultModel.settings.prompt_template,
+        ctx_len:
+          metadata['llama.context_length'] ?? defaultModel.settings.ctx_len,
+        ngl: (metadata['llama.block_count'] ?? 32) + 1,
         llama_model_path: binaryFileName,
       },
       created: Date.now(),
@@ -826,218 +865,4 @@ export default class JanModelExtension extends ModelExtension {
       importedModels
     )
   }
-
-  private getGgufFileList(
-    repoData: HuggingFaceRepoData,
-    selectedQuantization: Quantization
-  ): string[] {
-    return repoData.siblings
-      .map((file) => file.rfilename)
-      .filter((file) => file.indexOf(selectedQuantization) !== -1)
-      .filter((file) => file.endsWith('.gguf'))
-  }
-
-  private getFileList(repoData: HuggingFaceRepoData): string[] {
-    // SafeTensors first, if not, then PyTorch
-    const modelFiles = repoData.siblings
-      .map((file) => file.rfilename)
-      .filter((file) =>
-        JanModelExtension._safetensorsRegexs.some((regex) => regex.test(file))
-      )
-    if (modelFiles.length === 0) {
-      repoData.siblings.forEach((file) => {
-        if (
-          JanModelExtension._pytorchRegexs.some((regex) =>
-            regex.test(file.rfilename)
-          )
-        ) {
-          modelFiles.push(file.rfilename)
-        }
-      })
-    }
-
-    const vocabFiles = [
-      'tokenizer.model',
-      'vocab.json',
-      'tokenizer.json',
-    ].filter((file) =>
-      repoData.siblings.some((sibling) => sibling.rfilename === file)
-    )
-
-    const etcFiles = repoData.siblings
-      .map((file) => file.rfilename)
-      .filter(
-        (file) =>
-          (file.endsWith('.json') && !vocabFiles.includes(file)) ||
-          file.endsWith('.txt') ||
-          file.endsWith('.py') ||
-          file.endsWith('.tiktoken')
-      )
-
-    return [...modelFiles, ...vocabFiles, ...etcFiles]
-  }
-
-  private async getModelDirPath(repoID: string): Promise<string> {
-    const modelName = repoID.split('/').slice(1).join('/')
-    return joinPath([await getJanDataFolderPath(), 'models', modelName])
-  }
-
-  private async getConvertedModelPath(repoID: string): Promise<string> {
-    const modelName = repoID.split('/').slice(1).join('/')
-    const modelDirPath = await this.getModelDirPath(repoID)
-    return joinPath([modelDirPath, modelName + '.gguf'])
-  }
-
-  private async getQuantizedModelPath(
-    repoID: string,
-    quantization: Quantization
-  ): Promise<string> {
-    const modelName = repoID.split('/').slice(1).join('/')
-    const modelDirPath = await this.getModelDirPath(repoID)
-    return joinPath([
-      modelDirPath,
-      modelName + `-${quantization.toLowerCase()}.gguf`,
-    ])
-  }
-  private getCtxLength(config: {
-    max_sequence_length?: number
-    max_position_embeddings?: number
-    n_ctx?: number
-  }): number {
-    if (config.max_sequence_length) return config.max_sequence_length
-    if (config.max_position_embeddings) return config.max_position_embeddings
-    if (config.n_ctx) return config.n_ctx
-    return 2048
-  }
-
-  /**
-   * Converts a Hugging Face model to GGUF.
-   * @param repoID - The repo ID of the model to convert.
-   * @returns A promise that resolves when the conversion is complete.
-   */
-  async convert(repoID: string): Promise<void> {
-    if (this.interrupted) return
-    const modelDirPath = await this.getModelDirPath(repoID)
-    const modelOutPath = await this.getConvertedModelPath(repoID)
-    if (!(await fs.existsSync(modelDirPath))) {
-      throw new Error('Model dir not found')
-    }
-    if (await fs.existsSync(modelOutPath)) return
-
-    await executeOnMain(NODE, 'installDeps')
-    if (this.interrupted) return
-
-    try {
-      await executeOnMain(
-        NODE,
-        'convertHf',
-        modelDirPath,
-        modelOutPath + '.temp'
-      )
-    } catch (err) {
-      log(`[Conversion]::Debug: Error using hf-to-gguf.py, trying convert.py`)
-
-      let ctx = 2048
-      try {
-        const config = await fs.readFileSync(
-          await joinPath([modelDirPath, 'config.json']),
-          'utf8'
-        )
-        const configParsed = JSON.parse(config)
-        ctx = this.getCtxLength(configParsed)
-        configParsed.max_sequence_length = ctx
-        await fs.writeFileSync(
-          await joinPath([modelDirPath, 'config.json']),
-          JSON.stringify(configParsed, null, 2)
-        )
-      } catch (err) {
-        log(`${err}`)
-        // ignore missing config.json
-      }
-
-      const bpe = await fs.existsSync(
-        await joinPath([modelDirPath, 'vocab.json'])
-      )
-
-      await executeOnMain(
-        NODE,
-        'convert',
-        modelDirPath,
-        modelOutPath + '.temp',
-        {
-          ctx,
-          bpe,
-        }
-      )
-    }
-    await executeOnMain(
-      NODE,
-      'renameSync',
-      modelOutPath + '.temp',
-      modelOutPath
-    )
-
-    for (const file of await fs.readdirSync(modelDirPath)) {
-      if (
-        modelOutPath.endsWith(file) ||
-        (file.endsWith('config.json') && !file.endsWith('_config.json'))
-      )
-        continue
-      await fs.unlinkSync(await joinPath([modelDirPath, file]))
-    }
-  }
-
-  /**
-   * Quantizes a GGUF model.
-   * @param repoID - The repo ID of the model to quantize.
-   * @param quantization - The quantization to use.
-   * @returns A promise that resolves when the quantization is complete.
-   */
-  async quantize(repoID: string, quantization: Quantization): Promise<void> {
-    if (this.interrupted) return
-    const modelDirPath = await this.getModelDirPath(repoID)
-    const modelOutPath = await this.getQuantizedModelPath(repoID, quantization)
-    if (!(await fs.existsSync(modelDirPath))) {
-      throw new Error('Model dir not found')
-    }
-    if (await fs.existsSync(modelOutPath)) return
-
-    await executeOnMain(
-      NODE,
-      'quantize',
-      await this.getConvertedModelPath(repoID),
-      modelOutPath + '.temp',
-      quantization
-    )
-    await executeOnMain(
-      NODE,
-      'renameSync',
-      modelOutPath + '.temp',
-      modelOutPath
-    )
-
-    await fs.unlinkSync(await this.getConvertedModelPath(repoID))
-  }
-
-  /**
-   * Cancels the convert of current Hugging Face model.
-   * @param repoID - The repository ID to cancel.
-   * @param repoData - The repository data to cancel.
-   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
-   */
-  async cancelConvert(
-    repoID: string,
-    repoData: HuggingFaceRepoData
-  ): Promise<void> {
-    this.interrupted = true
-    const modelDirPath = await this.getModelDirPath(repoID)
-    const files = this.getFileList(repoData)
-    for (const file of files) {
-      const filePath = file
-      const localPath = await joinPath([modelDirPath, filePath])
-      await abortDownload(localPath)
-    }
-
-    executeOnMain(NODE, 'killProcesses')
-  }
 }
diff --git a/extensions/model-extension/src/node/index.ts b/extensions/model-extension/src/node/index.ts
index 991548e00..fa54408bf 100644
--- a/extensions/model-extension/src/node/index.ts
+++ b/extensions/model-extension/src/node/index.ts
@@ -1,182 +1,47 @@
-import { PythonShell } from 'python-shell'
-import { spawn, ChildProcess } from 'child_process'
-import { resolve as presolve, join as pjoin } from 'path'
-import { log, Quantization } from '@janhq/core/node'
-import { statSync } from 'fs'
-export { renameSync } from 'fs'
+import { closeSync, openSync, readSync } from 'fs'
+import { Template } from '@huggingface/jinja'
+/**
+ * This is to retrieve the metadata from a GGUF file
+ * It uses hyllama and jinja from @huggingface module
+ */
+export const retrieveGGUFMetadata = async (ggufPath: string) => {
+  try {
+    const { ggufMetadata } = await import('hyllama')
+    // Read first 10mb of gguf file
+    const fd = openSync(ggufPath, 'r')
+    const buffer = new Uint8Array(10_000_000)
+    readSync(fd, buffer, 0, 10_000_000, 0)
+    closeSync(fd)
 
-let pythonShell: PythonShell | undefined = undefined
-let quantizeProcess: ChildProcess | undefined = undefined
+    // Parse metadata and tensor info
+    const { metadata } = ggufMetadata(buffer.buffer)
 
-export const getSize = (path: string): number => statSync(path).size
-
-export const killProcesses = () => {
-  if (pythonShell) {
-    pythonShell.kill()
-    pythonShell = undefined
-  }
-  if (quantizeProcess) {
-    quantizeProcess.kill()
-    quantizeProcess = undefined
+    const template = new Template(metadata['tokenizer.chat_template'])
+    const eos_id = metadata['tokenizer.ggml.eos_token_id']
+    const bos_id = metadata['tokenizer.ggml.bos_token_id']
+    const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
+    const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
+    // Parse jinja template
+    const renderedTemplate = template.render({
+      add_generation_prompt: true,
+      eos_token,
+      bos_token,
+      messages: [
+        {
+          role: 'system',
+          content: '{system_message}',
+        },
+        {
+          role: 'user',
+          content: '{prompt}',
+        },
+      ],
+    })
+    return {
+      ...metadata,
+      parsed_chat_template: renderedTemplate,
+    }
+  } catch (e) {
+    console.log(e)
   }
 }
-
-export const getQuantizeExecutable = (): string => {
-  let binaryFolder = pjoin(__dirname, '..', 'bin') // Current directory by default
-  let binaryName = 'quantize'
-  /**
-   * The binary folder is different for each platform.
-   */
-  if (process.platform === 'win32') {
-    binaryFolder = pjoin(binaryFolder, 'win')
-    binaryName = 'quantize.exe'
-  } else if (process.platform === 'darwin') {
-    /**
-     *  For MacOS: mac-universal both Silicon and InteL
-     */
-    binaryFolder = pjoin(binaryFolder, 'mac-universal')
-  } else {
-    binaryFolder = pjoin(binaryFolder, 'linux-cpu')
-  }
-  return pjoin(binaryFolder, binaryName)
-}
-
-export const installDeps = (): Promise<void> => {
-  return new Promise((resolve, reject) => {
-    const _pythonShell = new PythonShell(
-      presolve(__dirname, '..', 'scripts', 'install_deps.py')
-    )
-    _pythonShell.on('message', (message) => {
-      log(`[Install Deps]::Debug: ${message}`)
-    })
-    _pythonShell.on('stderr', (stderr) => {
-      log(`[Install Deps]::Error: ${stderr}`)
-    })
-    _pythonShell.on('error', (err) => {
-      pythonShell = undefined
-      log(`[Install Deps]::Error: ${err}`)
-      reject(err)
-    })
-    _pythonShell.on('close', () => {
-      const exitCode = _pythonShell.exitCode
-      pythonShell = undefined
-      log(
-        `[Install Deps]::Debug: Deps installation exited with code: ${exitCode}`
-      )
-      exitCode === 0 ? resolve() : reject(exitCode)
-    })
-  })
-}
-
-export const convertHf = async (
-  modelDirPath: string,
-  outPath: string
-): Promise<void> => {
-  return await new Promise<void>((resolve, reject) => {
-    const _pythonShell = new PythonShell(
-      presolve(__dirname, '..', 'scripts', 'convert-hf-to-gguf.py'),
-      {
-        args: [modelDirPath, '--outfile', outPath],
-      }
-    )
-    pythonShell = _pythonShell
-    _pythonShell.on('message', (message) => {
-      log(`[Conversion]::Debug: ${message}`)
-    })
-    _pythonShell.on('stderr', (stderr) => {
-      log(`[Conversion]::Error: ${stderr}`)
-    })
-    _pythonShell.on('error', (err) => {
-      pythonShell = undefined
-      log(`[Conversion]::Error: ${err}`)
-      reject(err)
-    })
-    _pythonShell.on('close', () => {
-      const exitCode = _pythonShell.exitCode
-      pythonShell = undefined
-      if (exitCode !== 0) {
-        log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`)
-        reject(exitCode)
-      } else {
-        resolve()
-      }
-    })
-  })
-}
-
-export const convert = async (
-  modelDirPath: string,
-  outPath: string,
-  { ctx, bpe }: { ctx?: number; bpe?: boolean }
-): Promise<void> => {
-  const args = [modelDirPath, '--outfile', outPath]
-  if (ctx) {
-    args.push('--ctx')
-    args.push(ctx.toString())
-  }
-  if (bpe) {
-    args.push('--vocab-type')
-    args.push('bpe')
-  }
-  return await new Promise<void>((resolve, reject) => {
-    const _pythonShell = new PythonShell(
-      presolve(__dirname, '..', 'scripts', 'convert.py'),
-      {
-        args,
-      }
-    )
-    _pythonShell.on('message', (message) => {
-      log(`[Conversion]::Debug: ${message}`)
-    })
-    _pythonShell.on('stderr', (stderr) => {
-      log(`[Conversion]::Error: ${stderr}`)
-    })
-    _pythonShell.on('error', (err) => {
-      pythonShell = undefined
-      log(`[Conversion]::Error: ${err}`)
-      reject(err)
-    })
-    _pythonShell.on('close', () => {
-      const exitCode = _pythonShell.exitCode
-      pythonShell = undefined
-      if (exitCode !== 0) {
-        log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`)
-        reject(exitCode)
-      } else {
-        resolve()
-      }
-    })
-  })
-}
-
-export const quantize = async (
-  modelPath: string,
-  outPath: string,
-  quantization: Quantization
-): Promise<void> => {
-  return await new Promise<void>((resolve, reject) => {
-    const quantizeExecutable = getQuantizeExecutable()
-    const _quantizeProcess = spawn(quantizeExecutable, [
-      modelPath,
-      outPath,
-      quantization,
-    ])
-    quantizeProcess = _quantizeProcess
-
-    _quantizeProcess.stdout?.on('data', (data) => {
-      log(`[Quantization]::Debug: ${data}`)
-    })
-    _quantizeProcess.stderr?.on('data', (data) => {
-      log(`[Quantization]::Error: ${data}`)
-    })
-
-    _quantizeProcess.on('close', (code) => {
-      if (code !== 0) {
-        log(`[Quantization]::Debug: Quantization exited with code: ${code}`)
-        reject(code)
-      } else {
-        resolve()
-      }
-    })
-  })
-}