From dfd9c5415482d1bf3ddf70621fd4745bd53dd1f1 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 20 Aug 2024 17:01:51 +0700 Subject: [PATCH] feat: import model with gguf metadata extraction --- extensions/model-extension/package.json | 24 +- extensions/model-extension/rollup.config.ts | 35 ++- extensions/model-extension/src/index.ts | 257 +++---------------- extensions/model-extension/src/node/index.ts | 221 ++++------------ 4 files changed, 134 insertions(+), 403 deletions(-) diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json index 6bd8bbe5e..48732bf87 100644 --- a/extensions/model-extension/package.json +++ b/extensions/model-extension/package.json @@ -19,21 +19,22 @@ "build:publish": "run-script-os" }, "devDependencies": { - "cpx": "^1.5.0", - "download-cli": "^1.1.1", - "rimraf": "^3.0.2", - "ts-loader": "^9.5.0", - "typescript": "5.3.3", "@rollup/plugin-commonjs": "^25.0.7", "@rollup/plugin-json": "^6.1.0", "@rollup/plugin-node-resolve": "^15.2.3", "@rollup/plugin-replace": "^5.0.5", "@rollup/plugin-typescript": "^11.1.6", "@types/pdf-parse": "^1.1.4", + "cpx": "^1.5.0", + "download-cli": "^1.1.1", + "rimraf": "^3.0.2", "rollup": "^2.38.5", "rollup-plugin-define": "^1.0.1", "rollup-plugin-sourcemaps": "^0.6.3", - "rollup-plugin-typescript2": "^0.36.0" + "rollup-plugin-typescript2": "^0.36.0", + "run-script-os": "^1.1.6", + "ts-loader": "^9.5.0", + "typescript": "5.3.3" }, "files": [ "dist/*", @@ -41,8 +42,15 @@ "README.md" ], "dependencies": { - "@janhq/core": "file:../../core", "@huggingface/gguf": "^0.0.11", + "@huggingface/jinja": "^0.3.0", + "@janhq/core": "file:../../core", + "hyllama": "^0.2.2", "python-shell": "^5.0.0" - } + }, + "bundleDependencies": [ + "hyllama", + "@huggingface/gguf", + "@huggingface/jinja" + ] } diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts index aa22bd1f6..c3f3acc77 100644 --- a/extensions/model-extension/rollup.config.ts +++ b/extensions/model-extension/rollup.config.ts @@ -3,7 +3,7 @@ import sourceMaps from 'rollup-plugin-sourcemaps' import typescript from 'rollup-plugin-typescript2' import json from '@rollup/plugin-json' import replace from '@rollup/plugin-replace' - +import commonjs from '@rollup/plugin-commonjs' const settingJson = require('./resources/settings.json') const packageJson = require('./package.json') const defaultModelJson = require('./resources/default-model.json') @@ -39,6 +39,39 @@ export default [ browser: true, }), + // Resolve source maps to the original source + sourceMaps(), + ], + }, + { + input: `src/node/index.ts`, + output: [ + { + file: 'dist/node/index.cjs.js', + format: 'cjs', + sourcemap: true, + inlineDynamicImports: true, + }, + ], + // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') + external: ['@janhq/core/node'], + watch: { + include: 'src/node/**', + }, + plugins: [ + // Allow json resolution + json(), + // Compile TypeScript files + typescript({ useTsconfigDeclarationDir: true }), + // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) + commonjs(), + // Allow node_modules resolution, so you can use 'external' to control + // which external modules to include in the bundle + // https://github.com/rollup/rollup-plugin-node-resolve#usage + resolve({ + extensions: ['.ts', '.js', '.json'], + }), + // Resolve source maps to the original source sourceMaps(), ], diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts index 7561ee6ed..a8977e07e 100644 --- a/extensions/model-extension/src/index.ts +++ b/extensions/model-extension/src/index.ts @@ -19,8 +19,6 @@ import { DownloadRequest, executeOnMain, HuggingFaceRepoData, - Quantization, - log, getFileSize, AllQuantizations, ModelEvent, @@ -565,6 +563,19 @@ export default class JanModelExtension extends ModelExtension { } const defaultModel = (await this.getDefaultModel()) as Model + const metadata = await executeOnMain( + NODE, + 'retrieveGGUFMetadata', + await joinPath([ + await getJanDataFolderPath(), + 'models', + dirName, + binaryFileName, + ]) + ) + + const eos_id = metadata['tokenizer.ggml.eos_token_id'] + if (!defaultModel) { console.error('Unable to find default model') return @@ -581,8 +592,18 @@ export default class JanModelExtension extends ModelExtension { filename: binaryFileName, }, ], + parameters: { + ...defaultModel.parameters, + stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''], + }, settings: { ...defaultModel.settings, + prompt_template: + metadata.parsed_chat_template ?? + defaultModel.settings.prompt_template, + ctx_len: + metadata['llama.context_length'] ?? defaultModel.settings.ctx_len, + ngl: (metadata['llama.block_count'] ?? 32) + 1, llama_model_path: binaryFileName, }, created: Date.now(), @@ -657,6 +678,13 @@ export default class JanModelExtension extends ModelExtension { return } + const metadata = await executeOnMain( + NODE, + 'retrieveGGUFMetadata', + modelBinaryPath + ) + const eos_id = metadata['tokenizer.ggml.eos_token_id'] + const binaryFileName = await baseName(modelBinaryPath) const model: Model = { @@ -669,8 +697,19 @@ export default class JanModelExtension extends ModelExtension { filename: binaryFileName, }, ], + parameters: { + ...defaultModel.parameters, + stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''], + }, + settings: { ...defaultModel.settings, + prompt_template: + metadata.parsed_chat_template ?? + defaultModel.settings.prompt_template, + ctx_len: + metadata['llama.context_length'] ?? defaultModel.settings.ctx_len, + ngl: (metadata['llama.block_count'] ?? 32) + 1, llama_model_path: binaryFileName, }, created: Date.now(), @@ -826,218 +865,4 @@ export default class JanModelExtension extends ModelExtension { importedModels ) } - - private getGgufFileList( - repoData: HuggingFaceRepoData, - selectedQuantization: Quantization - ): string[] { - return repoData.siblings - .map((file) => file.rfilename) - .filter((file) => file.indexOf(selectedQuantization) !== -1) - .filter((file) => file.endsWith('.gguf')) - } - - private getFileList(repoData: HuggingFaceRepoData): string[] { - // SafeTensors first, if not, then PyTorch - const modelFiles = repoData.siblings - .map((file) => file.rfilename) - .filter((file) => - JanModelExtension._safetensorsRegexs.some((regex) => regex.test(file)) - ) - if (modelFiles.length === 0) { - repoData.siblings.forEach((file) => { - if ( - JanModelExtension._pytorchRegexs.some((regex) => - regex.test(file.rfilename) - ) - ) { - modelFiles.push(file.rfilename) - } - }) - } - - const vocabFiles = [ - 'tokenizer.model', - 'vocab.json', - 'tokenizer.json', - ].filter((file) => - repoData.siblings.some((sibling) => sibling.rfilename === file) - ) - - const etcFiles = repoData.siblings - .map((file) => file.rfilename) - .filter( - (file) => - (file.endsWith('.json') && !vocabFiles.includes(file)) || - file.endsWith('.txt') || - file.endsWith('.py') || - file.endsWith('.tiktoken') - ) - - return [...modelFiles, ...vocabFiles, ...etcFiles] - } - - private async getModelDirPath(repoID: string): Promise { - const modelName = repoID.split('/').slice(1).join('/') - return joinPath([await getJanDataFolderPath(), 'models', modelName]) - } - - private async getConvertedModelPath(repoID: string): Promise { - const modelName = repoID.split('/').slice(1).join('/') - const modelDirPath = await this.getModelDirPath(repoID) - return joinPath([modelDirPath, modelName + '.gguf']) - } - - private async getQuantizedModelPath( - repoID: string, - quantization: Quantization - ): Promise { - const modelName = repoID.split('/').slice(1).join('/') - const modelDirPath = await this.getModelDirPath(repoID) - return joinPath([ - modelDirPath, - modelName + `-${quantization.toLowerCase()}.gguf`, - ]) - } - private getCtxLength(config: { - max_sequence_length?: number - max_position_embeddings?: number - n_ctx?: number - }): number { - if (config.max_sequence_length) return config.max_sequence_length - if (config.max_position_embeddings) return config.max_position_embeddings - if (config.n_ctx) return config.n_ctx - return 2048 - } - - /** - * Converts a Hugging Face model to GGUF. - * @param repoID - The repo ID of the model to convert. - * @returns A promise that resolves when the conversion is complete. - */ - async convert(repoID: string): Promise { - if (this.interrupted) return - const modelDirPath = await this.getModelDirPath(repoID) - const modelOutPath = await this.getConvertedModelPath(repoID) - if (!(await fs.existsSync(modelDirPath))) { - throw new Error('Model dir not found') - } - if (await fs.existsSync(modelOutPath)) return - - await executeOnMain(NODE, 'installDeps') - if (this.interrupted) return - - try { - await executeOnMain( - NODE, - 'convertHf', - modelDirPath, - modelOutPath + '.temp' - ) - } catch (err) { - log(`[Conversion]::Debug: Error using hf-to-gguf.py, trying convert.py`) - - let ctx = 2048 - try { - const config = await fs.readFileSync( - await joinPath([modelDirPath, 'config.json']), - 'utf8' - ) - const configParsed = JSON.parse(config) - ctx = this.getCtxLength(configParsed) - configParsed.max_sequence_length = ctx - await fs.writeFileSync( - await joinPath([modelDirPath, 'config.json']), - JSON.stringify(configParsed, null, 2) - ) - } catch (err) { - log(`${err}`) - // ignore missing config.json - } - - const bpe = await fs.existsSync( - await joinPath([modelDirPath, 'vocab.json']) - ) - - await executeOnMain( - NODE, - 'convert', - modelDirPath, - modelOutPath + '.temp', - { - ctx, - bpe, - } - ) - } - await executeOnMain( - NODE, - 'renameSync', - modelOutPath + '.temp', - modelOutPath - ) - - for (const file of await fs.readdirSync(modelDirPath)) { - if ( - modelOutPath.endsWith(file) || - (file.endsWith('config.json') && !file.endsWith('_config.json')) - ) - continue - await fs.unlinkSync(await joinPath([modelDirPath, file])) - } - } - - /** - * Quantizes a GGUF model. - * @param repoID - The repo ID of the model to quantize. - * @param quantization - The quantization to use. - * @returns A promise that resolves when the quantization is complete. - */ - async quantize(repoID: string, quantization: Quantization): Promise { - if (this.interrupted) return - const modelDirPath = await this.getModelDirPath(repoID) - const modelOutPath = await this.getQuantizedModelPath(repoID, quantization) - if (!(await fs.existsSync(modelDirPath))) { - throw new Error('Model dir not found') - } - if (await fs.existsSync(modelOutPath)) return - - await executeOnMain( - NODE, - 'quantize', - await this.getConvertedModelPath(repoID), - modelOutPath + '.temp', - quantization - ) - await executeOnMain( - NODE, - 'renameSync', - modelOutPath + '.temp', - modelOutPath - ) - - await fs.unlinkSync(await this.getConvertedModelPath(repoID)) - } - - /** - * Cancels the convert of current Hugging Face model. - * @param repoID - The repository ID to cancel. - * @param repoData - The repository data to cancel. - * @returns {Promise} A promise that resolves when the download has been cancelled. - */ - async cancelConvert( - repoID: string, - repoData: HuggingFaceRepoData - ): Promise { - this.interrupted = true - const modelDirPath = await this.getModelDirPath(repoID) - const files = this.getFileList(repoData) - for (const file of files) { - const filePath = file - const localPath = await joinPath([modelDirPath, filePath]) - await abortDownload(localPath) - } - - executeOnMain(NODE, 'killProcesses') - } } diff --git a/extensions/model-extension/src/node/index.ts b/extensions/model-extension/src/node/index.ts index 991548e00..fa54408bf 100644 --- a/extensions/model-extension/src/node/index.ts +++ b/extensions/model-extension/src/node/index.ts @@ -1,182 +1,47 @@ -import { PythonShell } from 'python-shell' -import { spawn, ChildProcess } from 'child_process' -import { resolve as presolve, join as pjoin } from 'path' -import { log, Quantization } from '@janhq/core/node' -import { statSync } from 'fs' -export { renameSync } from 'fs' +import { closeSync, openSync, readSync } from 'fs' +import { Template } from '@huggingface/jinja' +/** + * This is to retrieve the metadata from a GGUF file + * It uses hyllama and jinja from @huggingface module + */ +export const retrieveGGUFMetadata = async (ggufPath: string) => { + try { + const { ggufMetadata } = await import('hyllama') + // Read first 10mb of gguf file + const fd = openSync(ggufPath, 'r') + const buffer = new Uint8Array(10_000_000) + readSync(fd, buffer, 0, 10_000_000, 0) + closeSync(fd) -let pythonShell: PythonShell | undefined = undefined -let quantizeProcess: ChildProcess | undefined = undefined + // Parse metadata and tensor info + const { metadata } = ggufMetadata(buffer.buffer) -export const getSize = (path: string): number => statSync(path).size - -export const killProcesses = () => { - if (pythonShell) { - pythonShell.kill() - pythonShell = undefined - } - if (quantizeProcess) { - quantizeProcess.kill() - quantizeProcess = undefined + const template = new Template(metadata['tokenizer.chat_template']) + const eos_id = metadata['tokenizer.ggml.eos_token_id'] + const bos_id = metadata['tokenizer.ggml.bos_token_id'] + const eos_token = metadata['tokenizer.ggml.tokens'][eos_id] + const bos_token = metadata['tokenizer.ggml.tokens'][bos_id] + // Parse jinja template + const renderedTemplate = template.render({ + add_generation_prompt: true, + eos_token, + bos_token, + messages: [ + { + role: 'system', + content: '{system_message}', + }, + { + role: 'user', + content: '{prompt}', + }, + ], + }) + return { + ...metadata, + parsed_chat_template: renderedTemplate, + } + } catch (e) { + console.log(e) } } - -export const getQuantizeExecutable = (): string => { - let binaryFolder = pjoin(__dirname, '..', 'bin') // Current directory by default - let binaryName = 'quantize' - /** - * The binary folder is different for each platform. - */ - if (process.platform === 'win32') { - binaryFolder = pjoin(binaryFolder, 'win') - binaryName = 'quantize.exe' - } else if (process.platform === 'darwin') { - /** - * For MacOS: mac-universal both Silicon and InteL - */ - binaryFolder = pjoin(binaryFolder, 'mac-universal') - } else { - binaryFolder = pjoin(binaryFolder, 'linux-cpu') - } - return pjoin(binaryFolder, binaryName) -} - -export const installDeps = (): Promise => { - return new Promise((resolve, reject) => { - const _pythonShell = new PythonShell( - presolve(__dirname, '..', 'scripts', 'install_deps.py') - ) - _pythonShell.on('message', (message) => { - log(`[Install Deps]::Debug: ${message}`) - }) - _pythonShell.on('stderr', (stderr) => { - log(`[Install Deps]::Error: ${stderr}`) - }) - _pythonShell.on('error', (err) => { - pythonShell = undefined - log(`[Install Deps]::Error: ${err}`) - reject(err) - }) - _pythonShell.on('close', () => { - const exitCode = _pythonShell.exitCode - pythonShell = undefined - log( - `[Install Deps]::Debug: Deps installation exited with code: ${exitCode}` - ) - exitCode === 0 ? resolve() : reject(exitCode) - }) - }) -} - -export const convertHf = async ( - modelDirPath: string, - outPath: string -): Promise => { - return await new Promise((resolve, reject) => { - const _pythonShell = new PythonShell( - presolve(__dirname, '..', 'scripts', 'convert-hf-to-gguf.py'), - { - args: [modelDirPath, '--outfile', outPath], - } - ) - pythonShell = _pythonShell - _pythonShell.on('message', (message) => { - log(`[Conversion]::Debug: ${message}`) - }) - _pythonShell.on('stderr', (stderr) => { - log(`[Conversion]::Error: ${stderr}`) - }) - _pythonShell.on('error', (err) => { - pythonShell = undefined - log(`[Conversion]::Error: ${err}`) - reject(err) - }) - _pythonShell.on('close', () => { - const exitCode = _pythonShell.exitCode - pythonShell = undefined - if (exitCode !== 0) { - log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`) - reject(exitCode) - } else { - resolve() - } - }) - }) -} - -export const convert = async ( - modelDirPath: string, - outPath: string, - { ctx, bpe }: { ctx?: number; bpe?: boolean } -): Promise => { - const args = [modelDirPath, '--outfile', outPath] - if (ctx) { - args.push('--ctx') - args.push(ctx.toString()) - } - if (bpe) { - args.push('--vocab-type') - args.push('bpe') - } - return await new Promise((resolve, reject) => { - const _pythonShell = new PythonShell( - presolve(__dirname, '..', 'scripts', 'convert.py'), - { - args, - } - ) - _pythonShell.on('message', (message) => { - log(`[Conversion]::Debug: ${message}`) - }) - _pythonShell.on('stderr', (stderr) => { - log(`[Conversion]::Error: ${stderr}`) - }) - _pythonShell.on('error', (err) => { - pythonShell = undefined - log(`[Conversion]::Error: ${err}`) - reject(err) - }) - _pythonShell.on('close', () => { - const exitCode = _pythonShell.exitCode - pythonShell = undefined - if (exitCode !== 0) { - log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`) - reject(exitCode) - } else { - resolve() - } - }) - }) -} - -export const quantize = async ( - modelPath: string, - outPath: string, - quantization: Quantization -): Promise => { - return await new Promise((resolve, reject) => { - const quantizeExecutable = getQuantizeExecutable() - const _quantizeProcess = spawn(quantizeExecutable, [ - modelPath, - outPath, - quantization, - ]) - quantizeProcess = _quantizeProcess - - _quantizeProcess.stdout?.on('data', (data) => { - log(`[Quantization]::Debug: ${data}`) - }) - _quantizeProcess.stderr?.on('data', (data) => { - log(`[Quantization]::Error: ${data}`) - }) - - _quantizeProcess.on('close', (code) => { - if (code !== 0) { - log(`[Quantization]::Debug: Quantization exited with code: ${code}`) - reject(code) - } else { - resolve() - } - }) - }) -}