feat: import model with gguf metadata extraction

This commit is contained in:
Louis 2024-08-20 17:01:51 +07:00
parent 1f5d504b3f
commit dfd9c54154
4 changed files with 134 additions and 403 deletions

View File

@ -19,21 +19,22 @@
"build:publish": "run-script-os"
},
"devDependencies": {
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"rimraf": "^3.0.2",
"ts-loader": "^9.5.0",
"typescript": "5.3.3",
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-replace": "^5.0.5",
"@rollup/plugin-typescript": "^11.1.6",
"@types/pdf-parse": "^1.1.4",
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"rimraf": "^3.0.2",
"rollup": "^2.38.5",
"rollup-plugin-define": "^1.0.1",
"rollup-plugin-sourcemaps": "^0.6.3",
"rollup-plugin-typescript2": "^0.36.0"
"rollup-plugin-typescript2": "^0.36.0",
"run-script-os": "^1.1.6",
"ts-loader": "^9.5.0",
"typescript": "5.3.3"
},
"files": [
"dist/*",
@ -41,8 +42,15 @@
"README.md"
],
"dependencies": {
"@janhq/core": "file:../../core",
"@huggingface/gguf": "^0.0.11",
"@huggingface/jinja": "^0.3.0",
"@janhq/core": "file:../../core",
"hyllama": "^0.2.2",
"python-shell": "^5.0.0"
}
},
"bundleDependencies": [
"hyllama",
"@huggingface/gguf",
"@huggingface/jinja"
]
}

View File

@ -3,7 +3,7 @@ import sourceMaps from 'rollup-plugin-sourcemaps'
import typescript from 'rollup-plugin-typescript2'
import json from '@rollup/plugin-json'
import replace from '@rollup/plugin-replace'
import commonjs from '@rollup/plugin-commonjs'
const settingJson = require('./resources/settings.json')
const packageJson = require('./package.json')
const defaultModelJson = require('./resources/default-model.json')
@ -39,6 +39,39 @@ export default [
browser: true,
}),
// Resolve source maps to the original source
sourceMaps(),
],
},
{
input: `src/node/index.ts`,
output: [
{
file: 'dist/node/index.cjs.js',
format: 'cjs',
sourcemap: true,
inlineDynamicImports: true,
},
],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: ['@janhq/core/node'],
watch: {
include: 'src/node/**',
},
plugins: [
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
// which external modules to include in the bundle
// https://github.com/rollup/rollup-plugin-node-resolve#usage
resolve({
extensions: ['.ts', '.js', '.json'],
}),
// Resolve source maps to the original source
sourceMaps(),
],

View File

@ -19,8 +19,6 @@ import {
DownloadRequest,
executeOnMain,
HuggingFaceRepoData,
Quantization,
log,
getFileSize,
AllQuantizations,
ModelEvent,
@ -565,6 +563,19 @@ export default class JanModelExtension extends ModelExtension {
}
const defaultModel = (await this.getDefaultModel()) as Model
const metadata = await executeOnMain(
NODE,
'retrieveGGUFMetadata',
await joinPath([
await getJanDataFolderPath(),
'models',
dirName,
binaryFileName,
])
)
const eos_id = metadata['tokenizer.ggml.eos_token_id']
if (!defaultModel) {
console.error('Unable to find default model')
return
@ -581,8 +592,18 @@ export default class JanModelExtension extends ModelExtension {
filename: binaryFileName,
},
],
parameters: {
...defaultModel.parameters,
stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''],
},
settings: {
...defaultModel.settings,
prompt_template:
metadata.parsed_chat_template ??
defaultModel.settings.prompt_template,
ctx_len:
metadata['llama.context_length'] ?? defaultModel.settings.ctx_len,
ngl: (metadata['llama.block_count'] ?? 32) + 1,
llama_model_path: binaryFileName,
},
created: Date.now(),
@ -657,6 +678,13 @@ export default class JanModelExtension extends ModelExtension {
return
}
const metadata = await executeOnMain(
NODE,
'retrieveGGUFMetadata',
modelBinaryPath
)
const eos_id = metadata['tokenizer.ggml.eos_token_id']
const binaryFileName = await baseName(modelBinaryPath)
const model: Model = {
@ -669,8 +697,19 @@ export default class JanModelExtension extends ModelExtension {
filename: binaryFileName,
},
],
parameters: {
...defaultModel.parameters,
stop: [metadata['tokenizer.ggml.tokens'][eos_id] ?? ''],
},
settings: {
...defaultModel.settings,
prompt_template:
metadata.parsed_chat_template ??
defaultModel.settings.prompt_template,
ctx_len:
metadata['llama.context_length'] ?? defaultModel.settings.ctx_len,
ngl: (metadata['llama.block_count'] ?? 32) + 1,
llama_model_path: binaryFileName,
},
created: Date.now(),
@ -826,218 +865,4 @@ export default class JanModelExtension extends ModelExtension {
importedModels
)
}
private getGgufFileList(
repoData: HuggingFaceRepoData,
selectedQuantization: Quantization
): string[] {
return repoData.siblings
.map((file) => file.rfilename)
.filter((file) => file.indexOf(selectedQuantization) !== -1)
.filter((file) => file.endsWith('.gguf'))
}
private getFileList(repoData: HuggingFaceRepoData): string[] {
// SafeTensors first, if not, then PyTorch
const modelFiles = repoData.siblings
.map((file) => file.rfilename)
.filter((file) =>
JanModelExtension._safetensorsRegexs.some((regex) => regex.test(file))
)
if (modelFiles.length === 0) {
repoData.siblings.forEach((file) => {
if (
JanModelExtension._pytorchRegexs.some((regex) =>
regex.test(file.rfilename)
)
) {
modelFiles.push(file.rfilename)
}
})
}
const vocabFiles = [
'tokenizer.model',
'vocab.json',
'tokenizer.json',
].filter((file) =>
repoData.siblings.some((sibling) => sibling.rfilename === file)
)
const etcFiles = repoData.siblings
.map((file) => file.rfilename)
.filter(
(file) =>
(file.endsWith('.json') && !vocabFiles.includes(file)) ||
file.endsWith('.txt') ||
file.endsWith('.py') ||
file.endsWith('.tiktoken')
)
return [...modelFiles, ...vocabFiles, ...etcFiles]
}
private async getModelDirPath(repoID: string): Promise<string> {
const modelName = repoID.split('/').slice(1).join('/')
return joinPath([await getJanDataFolderPath(), 'models', modelName])
}
private async getConvertedModelPath(repoID: string): Promise<string> {
const modelName = repoID.split('/').slice(1).join('/')
const modelDirPath = await this.getModelDirPath(repoID)
return joinPath([modelDirPath, modelName + '.gguf'])
}
private async getQuantizedModelPath(
repoID: string,
quantization: Quantization
): Promise<string> {
const modelName = repoID.split('/').slice(1).join('/')
const modelDirPath = await this.getModelDirPath(repoID)
return joinPath([
modelDirPath,
modelName + `-${quantization.toLowerCase()}.gguf`,
])
}
private getCtxLength(config: {
max_sequence_length?: number
max_position_embeddings?: number
n_ctx?: number
}): number {
if (config.max_sequence_length) return config.max_sequence_length
if (config.max_position_embeddings) return config.max_position_embeddings
if (config.n_ctx) return config.n_ctx
return 2048
}
/**
* Converts a Hugging Face model to GGUF.
* @param repoID - The repo ID of the model to convert.
* @returns A promise that resolves when the conversion is complete.
*/
async convert(repoID: string): Promise<void> {
if (this.interrupted) return
const modelDirPath = await this.getModelDirPath(repoID)
const modelOutPath = await this.getConvertedModelPath(repoID)
if (!(await fs.existsSync(modelDirPath))) {
throw new Error('Model dir not found')
}
if (await fs.existsSync(modelOutPath)) return
await executeOnMain(NODE, 'installDeps')
if (this.interrupted) return
try {
await executeOnMain(
NODE,
'convertHf',
modelDirPath,
modelOutPath + '.temp'
)
} catch (err) {
log(`[Conversion]::Debug: Error using hf-to-gguf.py, trying convert.py`)
let ctx = 2048
try {
const config = await fs.readFileSync(
await joinPath([modelDirPath, 'config.json']),
'utf8'
)
const configParsed = JSON.parse(config)
ctx = this.getCtxLength(configParsed)
configParsed.max_sequence_length = ctx
await fs.writeFileSync(
await joinPath([modelDirPath, 'config.json']),
JSON.stringify(configParsed, null, 2)
)
} catch (err) {
log(`${err}`)
// ignore missing config.json
}
const bpe = await fs.existsSync(
await joinPath([modelDirPath, 'vocab.json'])
)
await executeOnMain(
NODE,
'convert',
modelDirPath,
modelOutPath + '.temp',
{
ctx,
bpe,
}
)
}
await executeOnMain(
NODE,
'renameSync',
modelOutPath + '.temp',
modelOutPath
)
for (const file of await fs.readdirSync(modelDirPath)) {
if (
modelOutPath.endsWith(file) ||
(file.endsWith('config.json') && !file.endsWith('_config.json'))
)
continue
await fs.unlinkSync(await joinPath([modelDirPath, file]))
}
}
/**
* Quantizes a GGUF model.
* @param repoID - The repo ID of the model to quantize.
* @param quantization - The quantization to use.
* @returns A promise that resolves when the quantization is complete.
*/
async quantize(repoID: string, quantization: Quantization): Promise<void> {
if (this.interrupted) return
const modelDirPath = await this.getModelDirPath(repoID)
const modelOutPath = await this.getQuantizedModelPath(repoID, quantization)
if (!(await fs.existsSync(modelDirPath))) {
throw new Error('Model dir not found')
}
if (await fs.existsSync(modelOutPath)) return
await executeOnMain(
NODE,
'quantize',
await this.getConvertedModelPath(repoID),
modelOutPath + '.temp',
quantization
)
await executeOnMain(
NODE,
'renameSync',
modelOutPath + '.temp',
modelOutPath
)
await fs.unlinkSync(await this.getConvertedModelPath(repoID))
}
/**
* Cancels the convert of current Hugging Face model.
* @param repoID - The repository ID to cancel.
* @param repoData - The repository data to cancel.
* @returns {Promise<void>} A promise that resolves when the download has been cancelled.
*/
async cancelConvert(
repoID: string,
repoData: HuggingFaceRepoData
): Promise<void> {
this.interrupted = true
const modelDirPath = await this.getModelDirPath(repoID)
const files = this.getFileList(repoData)
for (const file of files) {
const filePath = file
const localPath = await joinPath([modelDirPath, filePath])
await abortDownload(localPath)
}
executeOnMain(NODE, 'killProcesses')
}
}

View File

@ -1,182 +1,47 @@
import { PythonShell } from 'python-shell'
import { spawn, ChildProcess } from 'child_process'
import { resolve as presolve, join as pjoin } from 'path'
import { log, Quantization } from '@janhq/core/node'
import { statSync } from 'fs'
export { renameSync } from 'fs'
import { closeSync, openSync, readSync } from 'fs'
import { Template } from '@huggingface/jinja'
/**
* This is to retrieve the metadata from a GGUF file
* It uses hyllama and jinja from @huggingface module
*/
export const retrieveGGUFMetadata = async (ggufPath: string) => {
try {
const { ggufMetadata } = await import('hyllama')
// Read first 10mb of gguf file
const fd = openSync(ggufPath, 'r')
const buffer = new Uint8Array(10_000_000)
readSync(fd, buffer, 0, 10_000_000, 0)
closeSync(fd)
let pythonShell: PythonShell | undefined = undefined
let quantizeProcess: ChildProcess | undefined = undefined
// Parse metadata and tensor info
const { metadata } = ggufMetadata(buffer.buffer)
export const getSize = (path: string): number => statSync(path).size
export const killProcesses = () => {
if (pythonShell) {
pythonShell.kill()
pythonShell = undefined
}
if (quantizeProcess) {
quantizeProcess.kill()
quantizeProcess = undefined
const template = new Template(metadata['tokenizer.chat_template'])
const eos_id = metadata['tokenizer.ggml.eos_token_id']
const bos_id = metadata['tokenizer.ggml.bos_token_id']
const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
// Parse jinja template
const renderedTemplate = template.render({
add_generation_prompt: true,
eos_token,
bos_token,
messages: [
{
role: 'system',
content: '{system_message}',
},
{
role: 'user',
content: '{prompt}',
},
],
})
return {
...metadata,
parsed_chat_template: renderedTemplate,
}
} catch (e) {
console.log(e)
}
}
export const getQuantizeExecutable = (): string => {
let binaryFolder = pjoin(__dirname, '..', 'bin') // Current directory by default
let binaryName = 'quantize'
/**
* The binary folder is different for each platform.
*/
if (process.platform === 'win32') {
binaryFolder = pjoin(binaryFolder, 'win')
binaryName = 'quantize.exe'
} else if (process.platform === 'darwin') {
/**
* For MacOS: mac-universal both Silicon and InteL
*/
binaryFolder = pjoin(binaryFolder, 'mac-universal')
} else {
binaryFolder = pjoin(binaryFolder, 'linux-cpu')
}
return pjoin(binaryFolder, binaryName)
}
export const installDeps = (): Promise<void> => {
return new Promise((resolve, reject) => {
const _pythonShell = new PythonShell(
presolve(__dirname, '..', 'scripts', 'install_deps.py')
)
_pythonShell.on('message', (message) => {
log(`[Install Deps]::Debug: ${message}`)
})
_pythonShell.on('stderr', (stderr) => {
log(`[Install Deps]::Error: ${stderr}`)
})
_pythonShell.on('error', (err) => {
pythonShell = undefined
log(`[Install Deps]::Error: ${err}`)
reject(err)
})
_pythonShell.on('close', () => {
const exitCode = _pythonShell.exitCode
pythonShell = undefined
log(
`[Install Deps]::Debug: Deps installation exited with code: ${exitCode}`
)
exitCode === 0 ? resolve() : reject(exitCode)
})
})
}
export const convertHf = async (
modelDirPath: string,
outPath: string
): Promise<void> => {
return await new Promise<void>((resolve, reject) => {
const _pythonShell = new PythonShell(
presolve(__dirname, '..', 'scripts', 'convert-hf-to-gguf.py'),
{
args: [modelDirPath, '--outfile', outPath],
}
)
pythonShell = _pythonShell
_pythonShell.on('message', (message) => {
log(`[Conversion]::Debug: ${message}`)
})
_pythonShell.on('stderr', (stderr) => {
log(`[Conversion]::Error: ${stderr}`)
})
_pythonShell.on('error', (err) => {
pythonShell = undefined
log(`[Conversion]::Error: ${err}`)
reject(err)
})
_pythonShell.on('close', () => {
const exitCode = _pythonShell.exitCode
pythonShell = undefined
if (exitCode !== 0) {
log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`)
reject(exitCode)
} else {
resolve()
}
})
})
}
export const convert = async (
modelDirPath: string,
outPath: string,
{ ctx, bpe }: { ctx?: number; bpe?: boolean }
): Promise<void> => {
const args = [modelDirPath, '--outfile', outPath]
if (ctx) {
args.push('--ctx')
args.push(ctx.toString())
}
if (bpe) {
args.push('--vocab-type')
args.push('bpe')
}
return await new Promise<void>((resolve, reject) => {
const _pythonShell = new PythonShell(
presolve(__dirname, '..', 'scripts', 'convert.py'),
{
args,
}
)
_pythonShell.on('message', (message) => {
log(`[Conversion]::Debug: ${message}`)
})
_pythonShell.on('stderr', (stderr) => {
log(`[Conversion]::Error: ${stderr}`)
})
_pythonShell.on('error', (err) => {
pythonShell = undefined
log(`[Conversion]::Error: ${err}`)
reject(err)
})
_pythonShell.on('close', () => {
const exitCode = _pythonShell.exitCode
pythonShell = undefined
if (exitCode !== 0) {
log(`[Conversion]::Debug: Conversion exited with code: ${exitCode}`)
reject(exitCode)
} else {
resolve()
}
})
})
}
export const quantize = async (
modelPath: string,
outPath: string,
quantization: Quantization
): Promise<void> => {
return await new Promise<void>((resolve, reject) => {
const quantizeExecutable = getQuantizeExecutable()
const _quantizeProcess = spawn(quantizeExecutable, [
modelPath,
outPath,
quantization,
])
quantizeProcess = _quantizeProcess
_quantizeProcess.stdout?.on('data', (data) => {
log(`[Quantization]::Debug: ${data}`)
})
_quantizeProcess.stderr?.on('data', (data) => {
log(`[Quantization]::Error: ${data}`)
})
_quantizeProcess.on('close', (code) => {
if (code !== 0) {
log(`[Quantization]::Debug: Quantization exited with code: ${code}`)
reject(code)
} else {
resolve()
}
})
})
}