feat: import model with gguf metadata extraction

2024-08-20 17:01:51 +07:00 · 2024-08-20 17:01:51 +07:00 · dfd9c54154
commit dfd9c54154
parent 1f5d504b3f
4 changed files with 134 additions and 403 deletions
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -19,21 +19,22 @@
    "build:publish": "run-script-os"
  },
  "devDependencies": {
    "cpx": "^1.5.0",
    "download-cli": "^1.1.1",
    "rimraf": "^3.0.2",
    "ts-loader": "^9.5.0",
    "typescript": "5.3.3",
    "@rollup/plugin-commonjs": "^25.0.7",
    "@rollup/plugin-json": "^6.1.0",
    "@rollup/plugin-node-resolve": "^15.2.3",
    "@rollup/plugin-replace": "^5.0.5",
    "@rollup/plugin-typescript": "^11.1.6",
    "@types/pdf-parse": "^1.1.4",
    "cpx": "^1.5.0",
    "download-cli": "^1.1.1",
    "rimraf": "^3.0.2",
    "rollup": "^2.38.5",
    "rollup-plugin-define": "^1.0.1",
    "rollup-plugin-sourcemaps": "^0.6.3",
-    "rollup-plugin-typescript2": "^0.36.0"
+    "rollup-plugin-typescript2": "^0.36.0",
    "run-script-os": "^1.1.6",
    "ts-loader": "^9.5.0",
    "typescript": "5.3.3"
  },
  "files": [
    "dist/*",
@ -41,8 +42,15 @@
    "README.md"
  ],
  "dependencies": {
    "@janhq/core": "file:../../core",
    "@huggingface/gguf": "^0.0.11",
    "@huggingface/jinja": "^0.3.0",
    "@janhq/core": "file:../../core",
    "hyllama": "^0.2.2",
    "python-shell": "^5.0.0"
-  }
+  },
  "bundleDependencies": [
    "hyllama",
    "@huggingface/gguf",
    "@huggingface/jinja"
  ]
 }
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@ -3,7 +3,7 @@ import sourceMaps from 'rollup-plugin-sourcemaps'
 import typescript from 'rollup-plugin-typescript2'
 import json from '@rollup/plugin-json'
 import replace from '@rollup/plugin-replace'
-
+import commonjs from '@rollup/plugin-commonjs'
 const settingJson = require('./resources/settings.json')
 const packageJson = require('./package.json')
 const defaultModelJson = require('./resources/default-model.json')
@ -39,6 +39,39 @@ export default [
        browser: true,
      }),
      // Resolve source maps to the original source
      sourceMaps(),
    ],
  },
  {
    input: `src/node/index.ts`,
    output: [
      {
        file: 'dist/node/index.cjs.js',
        format: 'cjs',
        sourcemap: true,
        inlineDynamicImports: true,
      },
    ],
    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
    external: ['@janhq/core/node'],
    watch: {
      include: 'src/node/**',
    },
    plugins: [
      // Allow json resolution
      json(),
      // Compile TypeScript files
      typescript({ useTsconfigDeclarationDir: true }),
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
      // which external modules to include in the bundle
      // https://github.com/rollup/rollup-plugin-node-resolve#usage
      resolve({
        extensions: ['.ts', '.js', '.json'],
      }),
      // Resolve source maps to the original source
      sourceMaps(),
    ],
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -19,8 +19,6 @@ import {
  DownloadRequest,
  executeOnMain,
  HuggingFaceRepoData,
  Quantization,
  log,
  getFileSize,
  AllQuantizations,
  ModelEvent,
@ -565,6 +563,19 @@ export default class JanModelExtension extends ModelExtension {
    }
    const defaultModel = (await this.getDefaultModel()) as Model
    const metadata = await executeOnMain(
      NODE,
      'retrieveGGUFMetadata',
      await joinPath([
        await getJanDataFolderPath(),
        'models',
        dirName,
        binaryFileName,
      ])
    )
    const eos_id = metadata['tokenizer.ggml.eos_token_id']
    if (!defaultModel) {
      console.error('Unable to find default model')
      return
@ -581,8 +592,18 @@ export default class JanModelExtension extends ModelExtension {
          filename: binaryFileName,
        },
      ],
      parameters: {
        ...defaultModel.parameters,
        stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''],
      },
      settings: {
        ...defaultModel.settings,
        prompt_template:
          metadata.parsed_chat_template ??
          defaultModel.settings.prompt_template,
        ctx_len:
          metadata['llama.context_length'] ?? defaultModel.settings.ctx_len,
        ngl: (metadata['llama.block_count'] ?? 32) + 1,
        llama_model_path: binaryFileName,
      },
      created: Date.now(),
@ -657,6 +678,13 @@ export default class JanModelExtension extends ModelExtension {
      return
    }
    const metadata = await executeOnMain(
      NODE,
      'retrieveGGUFMetadata',
      modelBinaryPath
    )
    const eos_id = metadata['tokenizer.ggml.eos_token_id']
    const binaryFileName = await baseName(modelBinaryPath)
    const model: Model = {
@ -669,8 +697,19 @@ export default class JanModelExtension extends ModelExtension {
          filename: binaryFileName,
        },
      ],
      parameters: {
        ...defaultModel.parameters,
        stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''],
      },
      settings: {
        ...defaultModel.settings,
        prompt_template:
          metadata.parsed_chat_template ??
          defaultModel.settings.prompt_template,
        ctx_len:
          metadata['llama.context_length'] ?? defaultModel.settings.ctx_len,
        ngl: (metadata['llama.block_count'] ?? 32) + 1,
        llama_model_path: binaryFileName,
      },
      created: Date.now(),
@ -826,218 +865,4 @@ export default class JanModelExtension extends ModelExtension {
      importedModels
    )
  }
  private getGgufFileList(
    repoData: HuggingFaceRepoData,
    selectedQuantization: Quantization
  ): string[] {
    return repoData.siblings
      .map((file) => file.rfilename)
      .filter((file) => file.indexOf(selectedQuantization) !== -1)
      .filter((file) => file.endsWith('.gguf'))
  }
  private getFileList(repoData: HuggingFaceRepoData): string[] {
    // SafeTensors first, if not, then PyTorch
    const modelFiles = repoData.siblings
      .map((file) => file.rfilename)
      .filter((file) =>
        JanModelExtension._safetensorsRegexs.some((regex) => regex.test(file))
      )
    if (modelFiles.length === 0) {
      repoData.siblings.forEach((file) => {
        if (
          JanModelExtension._pytorchRegexs.some((regex) =>
            regex.test(file.rfilename)
          )
        ) {
          modelFiles.push(file.rfilename)
        }
      })
    }
    const vocabFiles = [
      'tokenizer.model',
      'vocab.json',
      'tokenizer.json',
    ].filter((file) =>
      repoData.siblings.some((sibling) => sibling.rfilename === file)
    )
    const etcFiles = repoData.siblings
      .map((file) => file.rfilename)
      .filter(
        (file) =>
          (file.endsWith('.json') && !vocabFiles.includes(file)) ||
          file.endsWith('.txt') ||
          file.endsWith('.py') ||
          file.endsWith('.tiktoken')
      )
    return [...modelFiles, ...vocabFiles, ...etcFiles]
  }
  private async getModelDirPath(repoID: string): Promise<string> {
    const modelName = repoID.split('/').slice(1).join('/')
    return joinPath([await getJanDataFolderPath(), 'models', modelName])
  }
  private async getConvertedModelPath(repoID: string): Promise<string> {
    const modelName = repoID.split('/').slice(1).join('/')
    const modelDirPath = await this.getModelDirPath(repoID)
    return joinPath([modelDirPath, modelName + '.gguf'])
  }
  private async getQuantizedModelPath(
    repoID: string,
    quantization: Quantization
  ): Promise<string> {
    const modelName = repoID.split('/').slice(1).join('/')
    const modelDirPath = await this.getModelDirPath(repoID)
    return joinPath([
      modelDirPath,
      modelName + `-${quantization.toLowerCase()}.gguf`,
    ])
  }
  private getCtxLength(config: {
    max_sequence_length?: number
    max_position_embeddings?: number
    n_ctx?: number
  }): number {
    if (config.max_sequence_length) return config.max_sequence_length
    if (config.max_position_embeddings) return config.max_position_embeddings
    if (config.n_ctx) return config.n_ctx
    return 2048
  }
  /**
   * Converts a Hugging Face model to GGUF.
   * @param repoID - The repo ID of the model to convert.
   * @returns A promise that resolves when the conversion is complete.
   */
  async convert(repoID: string): Promise<void> {
    if (this.interrupted) return
    const modelDirPath = await this.getModelDirPath(repoID)
    const modelOutPath = await this.getConvertedModelPath(repoID)
    if (!(await fs.existsSync(modelDirPath))) {
      throw new Error('Model dir not found')
    }
    if (await fs.existsSync(modelOutPath)) return
    await executeOnMain(NODE, 'installDeps')
    if (this.interrupted) return
    try {
      await executeOnMain(
        NODE,
        'convertHf',
        modelDirPath,
        modelOutPath + '.temp'
      )
    } catch (err) {
      log(`[Conversion]::Debug: Error using hf-to-gguf.py, trying convert.py`)
      let ctx = 2048
      try {
        const config = await fs.readFileSync(
          await joinPath([modelDirPath, 'config.json']),
          'utf8'
        )
        const configParsed = JSON.parse(config)
        ctx = this.getCtxLength(configParsed)
        configParsed.max_sequence_length = ctx
        await fs.writeFileSync(
          await joinPath([modelDirPath, 'config.json']),
          JSON.stringify(configParsed, null, 2)
        )
      } catch (err) {
        log(`${err}`)
        // ignore missing config.json
      }
      const bpe = await fs.existsSync(
        await joinPath([modelDirPath, 'vocab.json'])
      )
      await executeOnMain(
        NODE,
        'convert',
        modelDirPath,
        modelOutPath + '.temp',
        {
          ctx,
          bpe,
        }
      )
    }
    await executeOnMain(
      NODE,
      'renameSync',
      modelOutPath + '.temp',
      modelOutPath
    )
    for (const file of await fs.readdirSync(modelDirPath)) {
      if (
        modelOutPath.endsWith(file) ||
        (file.endsWith('config.json') && !file.endsWith('_config.json'))
      )
        continue
      await fs.unlinkSync(await joinPath([modelDirPath, file]))
    }
  }
  /**
   * Quantizes a GGUF model.
   * @param repoID - The repo ID of the model to quantize.
   * @param quantization - The quantization to use.
   * @returns A promise that resolves when the quantization is complete.
   */
  async quantize(repoID: string, quantization: Quantization): Promise<void> {
    if (this.interrupted) return
    const modelDirPath = await this.getModelDirPath(repoID)
    const modelOutPath = await this.getQuantizedModelPath(repoID, quantization)
    if (!(await fs.existsSync(modelDirPath))) {
      throw new Error('Model dir not found')
    }
    if (await fs.existsSync(modelOutPath)) return
    await executeOnMain(
      NODE,
      'quantize',
      await this.getConvertedModelPath(repoID),
      modelOutPath + '.temp',
      quantization
    )
    await executeOnMain(
      NODE,
      'renameSync',
      modelOutPath + '.temp',
      modelOutPath
    )
    await fs.unlinkSync(await this.getConvertedModelPath(repoID))
  }
  /**
   * Cancels the convert of current Hugging Face model.
   * @param repoID - The repository ID to cancel.
   * @param repoData - The repository data to cancel.
   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
   */
  async cancelConvert(
    repoID: string,
    repoData: HuggingFaceRepoData
  ): Promise<void> {
    this.interrupted = true
    const modelDirPath = await this.getModelDirPath(repoID)
    const files = this.getFileList(repoData)
    for (const file of files) {
      const filePath = file
      const localPath = await joinPath([modelDirPath, filePath])
      await abortDownload(localPath)
    }
    executeOnMain(NODE, 'killProcesses')
  }
 }
--- a/extensions/model-extension/src/node/index.ts
+++ b/extensions/model-extension/src/node/index.ts
@ -1,182 +1,47 @@
-import { PythonShell } from 'python-shell'
+import { closeSync, openSync, readSync } from 'fs'
-import { spawn, ChildProcess } from 'child_process'
+import { Template } from '@huggingface/jinja'
-import { resolve as presolve, join as pjoin } from 'path'
+/**
-import { log, Quantization } from '@janhq/core/node'
+ * This is to retrieve the metadata from a GGUF file
-import { statSync } from 'fs'
+ * It uses hyllama and jinja from @huggingface module
-export { renameSync } from 'fs'
+ */
 export const retrieveGGUFMetadata = async (ggufPath: string) => {
  try {
    const { ggufMetadata } = await import('hyllama')
    // Read first 10mb of gguf file
    const fd = openSync(ggufPath, 'r')
    const buffer = new Uint8Array(10_000_000)
    readSync(fd, buffer, 0, 10_000_000, 0)
    closeSync(fd)
-let pythonShell: PythonShell | undefined = undefined
+    // Parse metadata and tensor info
-let quantizeProcess: ChildProcess | undefined = undefined
+    const { metadata } = ggufMetadata(buffer.buffer)
-export const getSize = (path: string): number => statSync(path).size
+    const template = new Template(metadata['tokenizer.chat_template'])
-
+    const eos_id = metadata['tokenizer.ggml.eos_token_id']
-export const killProcesses = () => {
+    const bos_id = metadata['tokenizer.ggml.bos_token_id']
-  if (pythonShell) {
+    const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
-    pythonShell.kill()
+    const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
-    pythonShell = undefined
+    // Parse jinja template
-  }
+    const renderedTemplate = template.render({
-  if (quantizeProcess) {
+      add_generation_prompt: true,
-    quantizeProcess.kill()
+      eos_token,
-    quantizeProcess = undefined
+      bos_token,
      messages: [
        {
          role: 'system',
          content: '{system_message}',
        },
        {
          role: 'user',
          content: '{prompt}',
        },
      ],
    })
    return {
      ...metadata,
      parsed_chat_template: renderedTemplate,
    }
  } catch (e) {
    console.log(e)
  }
 }
 export const getQuantizeExecutable = (): string => {
  let binaryFolder = pjoin(__dirname, '..', 'bin') // Current directory by default
  let binaryName = 'quantize'
  /**
   * The binary folder is different for each platform.
   */
  if (process.platform === 'win32') {
    binaryFolder = pjoin(binaryFolder, 'win')
    binaryName = 'quantize.exe'
  } else if (process.platform === 'darwin') {
    /**
     *  For MacOS: mac-universal both Silicon and InteL
     */
    binaryFolder = pjoin(binaryFolder, 'mac-universal')
  } else {
    binaryFolder = pjoin(binaryFolder, 'linux-cpu')
  }
  return pjoin(binaryFolder, binaryName)
 }
 export const installDeps = (): Promise<void> => {
  return new Promise((resolve, reject) => {
    const _pythonShell = new PythonShell(
      presolve(__dirname, '..', 'scripts', 'install_deps.py')
    )
    _pythonShell.on('message', (message) => {
      log(`[Install Deps]::Debug: ${message}`)
    })
    _pythonShell.on('stderr', (stderr) => {
      log(`[Install Deps]::Error: ${stderr}`)
    })
    _pythonShell.on('error', (err) => {
      pythonShell = undefined
      log(`[Install Deps]::Error: ${err}`)
      reject(err)
    })
    _pythonShell.on('close', () => {
      const exitCode = _pythonShell.exitCode
      pythonShell = undefined
      log(
        `[Install Deps]::Debug: Deps installation exited with code: ${exitCode}`
      )
      exitCode === 0 ? resolve() : reject(exitCode)
    })
  })
 }
 export const convertHf = async (
  modelDirPath: string,
  outPath: string
 ): Promise<void> => {
  return await new Promise<void>((resolve, reject) => {
    const _pythonShell = new PythonShell(
      presolve(__dirname, '..', 'scripts', 'convert-hf-to-gguf.py'),
      {
        args: [modelDirPath, '--outfile', outPath],
      }
    )
    pythonShell = _pythonShell
    _pythonShell.on('message', (message) => {
      log(`[Conversion]::Debug: ${message}`)
    })
    _pythonShell.on('stderr', (stderr) => {
      log(`[Conversion]::Error: ${stderr}`)
    })
    _pythonShell.on('error', (err) => {
      pythonShell = undefined
      log(`[Conversion]::Error: ${err}`)
      reject(err)
    })
    _pythonShell.on('close', () => {
      const exitCode = _pythonShell.exitCode
      pythonShell = undefined
      if (exitCode !== 0) {
        log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`)
        reject(exitCode)
      } else {
        resolve()
      }
    })
  })
 }
 export const convert = async (
  modelDirPath: string,
  outPath: string,
  { ctx, bpe }: { ctx?: number; bpe?: boolean }
 ): Promise<void> => {
  const args = [modelDirPath, '--outfile', outPath]
  if (ctx) {
    args.push('--ctx')
    args.push(ctx.toString())
  }
  if (bpe) {
    args.push('--vocab-type')
    args.push('bpe')
  }
  return await new Promise<void>((resolve, reject) => {
    const _pythonShell = new PythonShell(
      presolve(__dirname, '..', 'scripts', 'convert.py'),
      {
        args,
      }
    )
    _pythonShell.on('message', (message) => {
      log(`[Conversion]::Debug: ${message}`)
    })
    _pythonShell.on('stderr', (stderr) => {
      log(`[Conversion]::Error: ${stderr}`)
    })
    _pythonShell.on('error', (err) => {
      pythonShell = undefined
      log(`[Conversion]::Error: ${err}`)
      reject(err)
    })
    _pythonShell.on('close', () => {
      const exitCode = _pythonShell.exitCode
      pythonShell = undefined
      if (exitCode !== 0) {
        log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`)
        reject(exitCode)
      } else {
        resolve()
      }
    })
  })
 }
 export const quantize = async (
  modelPath: string,
  outPath: string,
  quantization: Quantization
 ): Promise<void> => {
  return await new Promise<void>((resolve, reject) => {
    const quantizeExecutable = getQuantizeExecutable()
    const _quantizeProcess = spawn(quantizeExecutable, [
      modelPath,
      outPath,
      quantization,
    ])
    quantizeProcess = _quantizeProcess
    _quantizeProcess.stdout?.on('data', (data) => {
      log(`[Quantization]::Debug: ${data}`)
    })
    _quantizeProcess.stderr?.on('data', (data) => {
      log(`[Quantization]::Error: ${data}`)
    })
    _quantizeProcess.on('close', (code) => {
      if (code !== 0) {
        log(`[Quantization]::Debug: Quantization exited with code: ${code}`)
        reject(code)
      } else {
        resolve()
      }
    })
  })
 }