Merge pull request #1888 from janhq/dev

Release cut 0.4.6
This commit is contained in:
Louis 2024-02-02 01:34:57 +07:00 committed by GitHub
commit 36ad16ff4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
183 changed files with 4834 additions and 2343 deletions

View File

@ -0,0 +1,4 @@
{
"name": "jan",
"image": "node:20"
}

View File

@ -8,7 +8,7 @@ on:
- 'README.md'
- 'docs/**'
schedule:
- cron: '0 20 * * 2,3,4' # At 8 PM UTC on Tuesday, Wednesday, and Thursday, which is 3 AM UTC+7
- cron: '0 20 * * 1,2,3' # At 8 PM UTC on Monday, Tuesday, and Wednesday which is 3 AM UTC+7 Tuesday, Wednesday, and Thursday
workflow_dispatch:
inputs:
public_provider:

View File

@ -98,8 +98,8 @@ jobs:
make build-and-publish
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }}
ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
- name: Upload Artifact .deb file
if: inputs.public_provider != 'github'

View File

@ -137,8 +137,8 @@ jobs:
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APP_PATH: "."
DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }}
ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
- name: Upload Artifact
if: inputs.public_provider != 'github'

View File

@ -127,8 +127,8 @@ jobs:
make build-and-publish
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }}
ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
AZURE_KEY_VAULT_URI: ${{ secrets.AZURE_KEY_VAULT_URI }}
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}

View File

@ -17,7 +17,7 @@ jobs:
with:
fetch-depth: "0"
token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
ref: main
ref: dev
- name: Get Latest Release
uses: pozetroninc/github-action-get-latest-release@v0.7.0
@ -46,4 +46,4 @@ jobs:
git config --global user.name "Service Account"
git add README.md
git commit -m "Update README.md with Stable Download URLs"
git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:main
git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:dev

3
.gitignore vendored
View File

@ -12,6 +12,8 @@ build
electron/renderer
electron/models
electron/docs
electron/engines
server/pre-install
package-lock.json
*.log
@ -26,3 +28,4 @@ extensions/inference-nitro-extension/bin/*/*.exp
extensions/inference-nitro-extension/bin/*/*.lib
extensions/inference-nitro-extension/bin/saved-*
extensions/inference-nitro-extension/bin/*.tar.gz

View File

@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
<tr style="text-align:center">
<td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
<td style="text-align:center">
<a href='https://delta.jan.ai/latest/jan-win-x64-0.4.4-170.exe'>
<a href='https://delta.jan.ai/latest/jan-win-x64-0.4.5-216.exe'>
<img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
<b>jan.exe</b>
</a>
</td>
<td style="text-align:center">
<a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.4-170.dmg'>
<a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.5-216.dmg'>
<img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
<b>Intel</b>
</a>
</td>
<td style="text-align:center">
<a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.4-170.dmg'>
<a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.5-216.dmg'>
<img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
<b>M1/M2</b>
</a>
</td>
<td style="text-align:center">
<a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.4-170.deb'>
<a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.5-216.deb'>
<img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
<b>jan.deb</b>
</a>
</td>
<td style="text-align:center">
<a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.4-170.AppImage'>
<a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.5-216.AppImage'>
<img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
<b>jan.AppImage</b>
</a>

5
core/.prettierignore Normal file
View File

@ -0,0 +1,5 @@
.next/
node_modules/
dist/
*.hbs
*.mdx

View File

@ -3,7 +3,6 @@
* @description Enum of all the routes exposed by the app
*/
export enum AppRoute {
appDataPath = 'appDataPath',
openExternalUrl = 'openExternalUrl',
openAppDirectory = 'openAppDirectory',
openFileExplore = 'openFileExplorer',
@ -12,6 +11,7 @@ export enum AppRoute {
updateAppConfiguration = 'updateAppConfiguration',
relaunch = 'relaunch',
joinPath = 'joinPath',
isSubdirectory = 'isSubdirectory',
baseName = 'baseName',
startServer = 'startServer',
stopServer = 'stopServer',
@ -61,7 +61,9 @@ export enum FileManagerRoute {
syncFile = 'syncFile',
getJanDataFolderPath = 'getJanDataFolderPath',
getResourcePath = 'getResourcePath',
getUserHomePath = 'getUserHomePath',
fileStat = 'fileStat',
writeBlob = 'writeBlob',
}
export type ApiFunction = (...args: any[]) => any

View File

@ -22,7 +22,11 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom
* @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
* @returns {Promise<any>} A promise that resolves when the file is downloaded.
*/
const downloadFile: (url: string, fileName: string, network?: { proxy?: string, ignoreSSL?: boolean }) => Promise<any> = (url, fileName, network) => {
const downloadFile: (
url: string,
fileName: string,
network?: { proxy?: string; ignoreSSL?: boolean }
) => Promise<any> = (url, fileName, network) => {
return global.core?.api?.downloadFile(url, fileName, network)
}
@ -79,6 +83,12 @@ const openExternalUrl: (url: string) => Promise<any> = (url) =>
*/
const getResourcePath: () => Promise<string> = () => global.core.api?.getResourcePath()
/**
* Gets the user's home path.
* @returns return user's home path
*/
const getUserHomePath = (): Promise<string> => global.core.api?.getUserHomePath()
/**
* Log to file from browser processes.
*
@ -87,6 +97,17 @@ const getResourcePath: () => Promise<string> = () => global.core.api?.getResourc
const log: (message: string, fileName?: string) => void = (message, fileName) =>
global.core.api?.log(message, fileName)
/**
* Check whether the path is a subdirectory of another path.
*
* @param from - The path to check.
* @param to - The path to check against.
*
* @returns {Promise<boolean>} - A promise that resolves with a boolean indicating whether the path is a subdirectory.
*/
const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: string, to: string) =>
global.core.api?.isSubdirectory(from, to)
/**
* Register extension point function type definition
*/
@ -94,7 +115,7 @@ export type RegisterExtensionPoint = (
extensionName: string,
extensionId: string,
method: Function,
priority?: number,
priority?: number
) => void
/**
@ -111,5 +132,7 @@ export {
openExternalUrl,
baseName,
log,
isSubdirectory,
getUserHomePath,
FileStat,
}

View File

@ -1,4 +1,4 @@
import { FileStat } from "./types"
import { FileStat } from './types'
/**
* Writes data to a file at the specified path.
@ -6,6 +6,15 @@ import { FileStat } from "./types"
*/
const writeFileSync = (...args: any[]) => global.core.api?.writeFileSync(...args)
/**
* Writes blob data to a file at the specified path.
* @param path - The path to file.
* @param data - The blob data.
* @returns
*/
const writeBlob: (path: string, data: string) => Promise<any> = (path, data) =>
global.core.api?.writeBlob(path, data)
/**
* Reads the contents of a file at the specified path.
* @returns {Promise<any>} A Promise that resolves with the contents of the file.
@ -60,7 +69,6 @@ const syncFile: (src: string, dest: string) => Promise<any> = (src, dest) =>
*/
const copyFileSync = (...args: any[]) => global.core.api?.copyFileSync(...args)
/**
* Gets the file's stats.
*
@ -70,7 +78,6 @@ const copyFileSync = (...args: any[]) => global.core.api?.copyFileSync(...args)
const fileStat: (path: string) => Promise<FileStat | undefined> = (path) =>
global.core.api?.fileStat(path)
// TODO: Export `dummy` fs functions automatically
// Currently adding these manually
export const fs = {
@ -84,5 +91,6 @@ export const fs = {
appendFileSync,
copyFileSync,
syncFile,
fileStat
fileStat,
writeBlob,
}

View File

@ -2,7 +2,8 @@ import fs from 'fs'
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
import { join } from 'path'
import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
import { getJanDataFolderPath } from '../../utils'
import { getEngineConfiguration, getJanDataFolderPath } from '../../utils'
import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
export const getBuilder = async (configuration: RouteConfiguration) => {
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
@ -265,19 +266,22 @@ export const downloadModel = async (
const modelBinaryPath = join(directoryPath, modelId)
const request = require('request')
const rq = request({ url: model.source_url, strictSSL, proxy })
const progress = require('request-progress')
progress(rq, {})
.on('progress', function (state: any) {
console.log('progress', JSON.stringify(state, null, 2))
})
.on('error', function (err: Error) {
console.error('error', err)
})
.on('end', function () {
console.log('end')
})
.pipe(fs.createWriteStream(modelBinaryPath))
for (const source of model.sources) {
const rq = request({ url: source, strictSSL, proxy })
progress(rq, {})
.on('progress', function (state: any) {
console.debug('progress', JSON.stringify(state, null, 2))
})
.on('error', function (err: Error) {
console.error('error', err)
})
.on('end', function () {
console.debug('end')
})
.pipe(fs.createWriteStream(modelBinaryPath))
}
return {
message: `Starting download ${modelId}`,
@ -306,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => {
const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
let apiKey: string | undefined = undefined
let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url
let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
if (engineConfiguration) {
apiKey = engineConfiguration.api_key
@ -317,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
"Access-Control-Allow-Origin": "*"
'Access-Control-Allow-Origin': '*',
})
const headers: Record<string, any> = {
@ -343,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => {
response.body.pipe(reply.raw)
}
}
const getEngineConfiguration = async (engineId: string) => {
if (engineId !== 'openai') {
return undefined
}
const directoryPath = join(getJanDataFolderPath(), 'engines')
const filePath = join(directoryPath, `${engineId}.json`)
const data = await fs.readFileSync(filePath, 'utf-8')
return JSON.parse(data)
}

View File

@ -0,0 +1,19 @@
// The PORT to use for the Nitro subprocess
export const NITRO_DEFAULT_PORT = 3928
// The HOST address to use for the Nitro subprocess
export const LOCAL_HOST = '127.0.0.1'
export const SUPPORTED_MODEL_FORMAT = '.gguf'
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
// The URL for the Nitro subprocess to load a model
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
// The URL for the Nitro subprocess to validate a model
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
// The URL for the Nitro subprocess to kill itself
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url

View File

@ -0,0 +1,351 @@
import fs from 'fs'
import { join } from 'path'
import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils'
import { logServer } from '../../log'
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import { Model, ModelSettingParams, PromptTemplate } from '../../../types'
import {
LOCAL_HOST,
NITRO_DEFAULT_PORT,
NITRO_HTTP_KILL_URL,
NITRO_HTTP_LOAD_MODEL_URL,
NITRO_HTTP_VALIDATE_MODEL_URL,
SUPPORTED_MODEL_FORMAT,
} from './consts'
// The subprocess instance for Nitro
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
// TODO: move this to core type
interface NitroModelSettings extends ModelSettingParams {
llama_model_path: string
cpu_threads: number
}
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
try {
await runModel(modelId, settingParams)
return {
message: `Model ${modelId} started`,
}
} catch (e) {
return {
error: e,
}
}
}
const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
const janDataFolderPath = getJanDataFolderPath()
const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
if (!fs.existsSync(modelFolderFullPath)) {
throw `Model not found: ${modelId}`
}
const files: string[] = fs.readdirSync(modelFolderFullPath)
// Look for GGUF model file
const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
const modelMetadataPath = join(modelFolderFullPath, 'model.json')
const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
if (!ggufBinFile) {
throw 'No GGUF model file found'
}
const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
const nitroResourceProbe = await getSystemResourceInfo()
const nitroModelSettings: NitroModelSettings = {
...modelMetadata.settings,
...settingParams,
llama_model_path: modelBinaryPath,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(modelMetadata.settings.mmproj && {
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
}),
}
logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (modelMetadata.settings.prompt_template) {
const promptTemplate = modelMetadata.settings.prompt_template
const prompt = promptTemplateConverter(promptTemplate)
if (prompt?.error) {
return Promise.reject(prompt.error)
}
nitroModelSettings.system_prompt = prompt.system_prompt
nitroModelSettings.user_prompt = prompt.user_prompt
nitroModelSettings.ai_prompt = prompt.ai_prompt
}
await runNitroAndLoadModel(modelId, nitroModelSettings)
}
// TODO: move to util
const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
// Split the string using the markers
const systemMarker = '{system_message}'
const promptMarker = '{prompt}'
if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker)
const promptIndex = promptTemplate.indexOf(promptMarker)
// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex)
const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
// Return the split parts
return { system_prompt, user_prompt, ai_prompt }
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker)
const user_prompt = promptTemplate.substring(0, promptIndex)
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
// Return the split parts
return { user_prompt, ai_prompt }
}
// Return an error if none of the conditions are met
return { error: 'Cannot split prompt template' }
}
const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
// Gather system information for CPU physical cores and memory
const tcpPortUsed = require('tcp-port-used')
await stopModel(modelId)
await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
/**
* There is a problem with Windows process manager
* Should wait for awhile to make sure the port is free and subprocess is killed
* The tested threshold is 500ms
**/
if (process.platform === 'win32') {
await new Promise((resolve) => setTimeout(resolve, 500))
}
await spawnNitroProcess()
await loadLLMModel(modelSettings)
await validateModelStatus()
}
const spawnNitroProcess = async (): Promise<void> => {
logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`)
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'dist',
'bin'
)
let executableOptions = executableNitroFile()
const tcpPortUsed = require('tcp-port-used')
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
// Execute the binary
logServer(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
)
subprocess = spawn(
executableOptions.executablePath,
['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
{
cwd: binaryFolder,
env: {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
)
// Handle subprocess output
subprocess.stdout.on('data', (data: any) => {
logServer(`[NITRO]::Debug: ${data}`)
})
subprocess.stderr.on('data', (data: any) => {
logServer(`[NITRO]::Error: ${data}`)
})
subprocess.on('close', (code: any) => {
logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`)
subprocess = undefined
})
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
logServer(`[NITRO]::Debug: Nitro is ready`)
})
}
type NitroExecutableOptions = {
executablePath: string
cudaVisibleDevices: string
}
const executableNitroFile = (): NitroExecutableOptions => {
const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'dist',
'bin'
)
let cudaVisibleDevices = ''
let binaryName = 'nitro'
/**
* The binary folder is different for each platform.
*/
if (process.platform === 'win32') {
/**
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
*/
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
binaryFolder = join(binaryFolder, 'win-cpu')
} else {
if (nvidiaInfo['cuda'].version === '12') {
binaryFolder = join(binaryFolder, 'win-cuda-12-0')
} else {
binaryFolder = join(binaryFolder, 'win-cuda-11-7')
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
binaryName = 'nitro.exe'
} else if (process.platform === 'darwin') {
/**
* For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
*/
if (process.arch === 'arm64') {
binaryFolder = join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = join(binaryFolder, 'mac-x64')
}
} else {
/**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
*/
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
binaryFolder = join(binaryFolder, 'linux-cpu')
} else {
if (nvidiaInfo['cuda'].version === '12') {
binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
} else {
binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
}
return {
executablePath: join(binaryFolder, binaryName),
cudaVisibleDevices,
}
}
const validateModelStatus = async (): Promise<void> => {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
const fetchRT = require('fetch-retry')
const fetchRetry = fetchRT(fetch)
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
retries: 5,
retryDelay: 500,
}).then(async (res: Response) => {
logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
// If the response is OK, check model_loaded status.
if (res.ok) {
const body = await res.json()
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
return Promise.resolve()
}
}
return Promise.reject('Validate model status failed')
})
}
const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
const fetchRT = require('fetch-retry')
const fetchRetry = fetchRT(fetch)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(settings),
retries: 3,
retryDelay: 500,
})
.then((res: any) => {
logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`)
return Promise.resolve(res)
})
.catch((err: any) => {
logServer(`[NITRO]::Error: Load model failed with error ${err}`)
return Promise.reject(err)
})
}
/**
* Stop model and kill nitro process.
*/
export const stopModel = async (_modelId: string) => {
if (!subprocess) {
return {
error: "Model isn't running",
}
}
return new Promise((resolve, reject) => {
const controller = new AbortController()
setTimeout(() => {
controller.abort()
reject({
error: 'Failed to stop model: Timedout',
})
}, 5000)
const tcpPortUsed = require('tcp-port-used')
logServer(`[NITRO]::Debug: Request to kill Nitro`)
fetch(NITRO_HTTP_KILL_URL, {
method: 'DELETE',
signal: controller.signal,
})
.then(() => {
subprocess?.kill()
subprocess = undefined
})
.catch(() => {
// don't need to do anything, we still kill the subprocess
})
.then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
.then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`))
.then(() =>
resolve({
message: 'Model stopped',
})
)
})
}

View File

@ -10,6 +10,8 @@ import {
} from '../common/builder'
import { JanApiRouteConfiguration } from '../common/configuration'
import { startModel, stopModel } from '../common/startStopModel'
import { ModelSettingParams } from '../../../types'
export const commonRouter = async (app: HttpServer) => {
// Common Routes
@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => {
app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
app.get(`/${key}/:id`, async (request: any) =>
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id),
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
)
app.delete(`/${key}/:id`, async (request: any) =>
deleteBuilder(JanApiRouteConfiguration[key], request.params.id),
deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
)
})
// Download Model Routes
app.get(`/models/download/:modelId`, async (request: any) =>
downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }),
downloadModel(request.params.modelId, {
ignoreSSL: request.query.ignoreSSL === 'true',
proxy: request.query.proxy,
})
)
app.put(`/models/:modelId/start`, async (request: any) => {
let settingParams: ModelSettingParams | undefined = undefined
if (Object.keys(request.body).length !== 0) {
settingParams = JSON.parse(request.body) as ModelSettingParams
}
return startModel(request.params.modelId, settingParams)
})
app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
// Chat Completion Routes
app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))

View File

@ -8,5 +8,7 @@ export const fsRouter = async (app: HttpServer) => {
app.post(`/app/${FileManagerRoute.getResourcePath}`, async (request: any, reply: any) => {})
app.post(`/app/${FileManagerRoute.getUserHomePath}`, async (request: any, reply: any) => {})
app.post(`/app/${FileManagerRoute.fileStat}`, async (request: any, reply: any) => {})
}

View File

@ -2,6 +2,7 @@ import { FileSystemRoute } from '../../../api'
import { join } from 'path'
import { HttpServer } from '../HttpServer'
import { getJanDataFolderPath } from '../../utils'
import { normalizeFilePath } from '../../path'
export const fsRouter = async (app: HttpServer) => {
const moduleName = 'fs'
@ -13,10 +14,10 @@ export const fsRouter = async (app: HttpServer) => {
const result = await import(moduleName).then((mdl) => {
return mdl[route](
...body.map((arg: any) =>
typeof arg === 'string' && arg.includes('file:/')
? join(getJanDataFolderPath(), arg.replace('file:/', ''))
: arg,
),
typeof arg === 'string' && (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`))
? join(getJanDataFolderPath(), normalizeFilePath(arg))
: arg
)
)
})
res.status(200).send(result)

View File

@ -1,16 +1,18 @@
import { AppConfiguration } from "../../types";
import { join } from "path";
import fs from "fs";
import os from "os";
import { AppConfiguration, SystemResourceInfo } from '../../types'
import { join } from 'path'
import fs from 'fs'
import os from 'os'
import { log, logServer } from '../log'
import childProcess from 'child_process'
// TODO: move this to core
const configurationFileName = "settings.json";
const configurationFileName = 'settings.json'
// TODO: do no specify app name in framework module
const defaultJanDataFolder = join(os.homedir(), "jan");
const defaultJanDataFolder = join(os.homedir(), 'jan')
const defaultAppConfig: AppConfiguration = {
data_folder: defaultJanDataFolder,
};
}
/**
* Getting App Configurations.
@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = {
export const getAppConfigurations = (): AppConfiguration => {
// Retrieve Application Support folder path
// Fallback to user home directory if not found
const configurationFile = getConfigurationFilePath();
const configurationFile = getConfigurationFilePath()
if (!fs.existsSync(configurationFile)) {
// create default app config if we don't have one
console.debug(`App config not found, creating default config at ${configurationFile}`);
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig));
return defaultAppConfig;
console.debug(`App config not found, creating default config at ${configurationFile}`)
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig))
return defaultAppConfig
}
try {
const appConfigurations: AppConfiguration = JSON.parse(
fs.readFileSync(configurationFile, "utf-8"),
);
return appConfigurations;
fs.readFileSync(configurationFile, 'utf-8')
)
return appConfigurations
} catch (err) {
console.error(`Failed to read app config, return default config instead! Err: ${err}`);
return defaultAppConfig;
console.error(`Failed to read app config, return default config instead! Err: ${err}`)
return defaultAppConfig
}
};
}
const getConfigurationFilePath = () =>
join(
global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"],
configurationFileName,
);
global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
configurationFileName
)
export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
const configurationFile = getConfigurationFilePath();
console.debug("updateAppConfiguration, configurationFile: ", configurationFile);
const configurationFile = getConfigurationFilePath()
console.debug('updateAppConfiguration, configurationFile: ', configurationFile)
fs.writeFileSync(configurationFile, JSON.stringify(configuration));
return Promise.resolve();
};
fs.writeFileSync(configurationFile, JSON.stringify(configuration))
return Promise.resolve()
}
/**
* Utility function to get server log path
@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise
* @returns {string} The log path.
*/
export const getServerLogPath = (): string => {
const appConfigurations = getAppConfigurations();
const logFolderPath = join(appConfigurations.data_folder, "logs");
const appConfigurations = getAppConfigurations()
const logFolderPath = join(appConfigurations.data_folder, 'logs')
if (!fs.existsSync(logFolderPath)) {
fs.mkdirSync(logFolderPath, { recursive: true });
fs.mkdirSync(logFolderPath, { recursive: true })
}
return join(logFolderPath, "server.log");
};
return join(logFolderPath, 'server.log')
}
/**
* Utility function to get app log path
@ -74,13 +76,13 @@ export const getServerLogPath = (): string => {
* @returns {string} The log path.
*/
export const getAppLogPath = (): string => {
const appConfigurations = getAppConfigurations();
const logFolderPath = join(appConfigurations.data_folder, "logs");
const appConfigurations = getAppConfigurations()
const logFolderPath = join(appConfigurations.data_folder, 'logs')
if (!fs.existsSync(logFolderPath)) {
fs.mkdirSync(logFolderPath, { recursive: true });
fs.mkdirSync(logFolderPath, { recursive: true })
}
return join(logFolderPath, "app.log");
};
return join(logFolderPath, 'app.log')
}
/**
* Utility function to get data folder path
@ -88,9 +90,9 @@ export const getAppLogPath = (): string => {
* @returns {string} The data folder path.
*/
export const getJanDataFolderPath = (): string => {
const appConfigurations = getAppConfigurations();
return appConfigurations.data_folder;
};
const appConfigurations = getAppConfigurations()
return appConfigurations.data_folder
}
/**
* Utility function to get extension path
@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => {
* @returns {string} The extensions path.
*/
export const getJanExtensionsPath = (): string => {
const appConfigurations = getAppConfigurations();
return join(appConfigurations.data_folder, "extensions");
};
const appConfigurations = getAppConfigurations()
return join(appConfigurations.data_folder, 'extensions')
}
/**
* Utility function to physical cpu count
*
* @returns {number} The physical cpu count.
*/
export const physicalCpuCount = async (): Promise<number> => {
const platform = os.platform()
if (platform === 'linux') {
const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
return parseInt(output.trim(), 10)
} else if (platform === 'darwin') {
const output = await exec('sysctl -n hw.physicalcpu_max')
return parseInt(output.trim(), 10)
} else if (platform === 'win32') {
const output = await exec('WMIC CPU Get NumberOfCores')
return output
.split(os.EOL)
.map((line: string) => parseInt(line))
.filter((value: number) => !isNaN(value))
.reduce((sum: number, number: number) => sum + number, 1)
} else {
const cores = os.cpus().filter((cpu: any, index: number) => {
const hasHyperthreading = cpu.model.includes('Intel')
const isOdd = index % 2 === 1
return !hasHyperthreading || isOdd
})
return cores.length
}
}
const exec = async (command: string): Promise<string> => {
return new Promise((resolve, reject) => {
childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
if (error) {
reject(error)
} else {
resolve(stdout)
}
})
})
}
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
const cpu = await physicalCpuCount()
const message = `[NITRO]::CPU informations - ${cpu}`
log(message)
logServer(message)
return {
numCpuPhysicalCore: cpu,
memAvailable: 0, // TODO: this should not be 0
}
}
export const getEngineConfiguration = async (engineId: string) => {
if (engineId !== 'openai') {
return undefined
}
const directoryPath = join(getJanDataFolderPath(), 'engines')
const filePath = join(directoryPath, `${engineId}.json`)
const data = fs.readFileSync(filePath, 'utf-8')
return JSON.parse(data)
}

View File

@ -2,6 +2,13 @@
* Assistant type defines the shape of an assistant object.
* @stored
*/
export type AssistantTool = {
type: string
enabled: boolean
settings: any
}
export type Assistant = {
/** Represents the avatar of the user. */
avatar: string
@ -22,7 +29,7 @@ export type Assistant = {
/** Represents the instructions for the object. */
instructions?: string
/** Represents the tools associated with the object. */
tools?: any
tools?: AssistantTool[]
/** Represents the file identifiers associated with the object. */
file_ids: string[]
/** Represents the metadata of the object. */

View File

@ -6,3 +6,4 @@ export * from './inference'
export * from './monitoring'
export * from './file'
export * from './config'
export * from './miscellaneous'

View File

@ -1,3 +1,5 @@
import { ContentType, ContentValue } from '../message'
/**
* The role of the author of this message.
*/
@ -13,7 +15,32 @@ export enum ChatCompletionRole {
*/
export type ChatCompletionMessage = {
/** The contents of the message. **/
content?: string
content?: ChatCompletionMessageContent
/** The role of the author of this message. **/
role: ChatCompletionRole
}
export type ChatCompletionMessageContent =
| string
| (ChatCompletionMessageContentText &
ChatCompletionMessageContentImage &
ChatCompletionMessageContentDoc)[]
export enum ChatCompletionMessageContentType {
Text = 'text',
Image = 'image_url',
Doc = 'doc_url',
}
export type ChatCompletionMessageContentText = {
type: ChatCompletionMessageContentType
text: string
}
export type ChatCompletionMessageContentImage = {
type: ChatCompletionMessageContentType
image_url: { url: string }
}
export type ChatCompletionMessageContentDoc = {
type: ChatCompletionMessageContentType
doc_url: { url: string }
}

View File

@ -1,5 +1,6 @@
import { ChatCompletionMessage, ChatCompletionRole } from '../inference'
import { ModelInfo } from '../model'
import { Thread } from '../thread'
/**
* The `ThreadMessage` type defines the shape of a thread's message object.
@ -35,7 +36,10 @@ export type ThreadMessage = {
export type MessageRequest = {
id?: string
/** The thread id of the message request. **/
/**
* @deprecated Use thread object instead
* The thread id of the message request.
*/
threadId: string
/**
@ -48,6 +52,10 @@ export type MessageRequest = {
/** Settings for constructing a chat completion request **/
model?: ModelInfo
/** The thread of this message is belong to. **/
// TODO: deprecate threadId field
thread?: Thread
}
/**
@ -62,7 +70,7 @@ export enum MessageStatus {
/** Message loaded with error. **/
Error = 'error',
/** Message is cancelled streaming */
Stopped = "stopped"
Stopped = 'stopped',
}
/**
@ -71,6 +79,7 @@ export enum MessageStatus {
export enum ContentType {
Text = 'text',
Image = 'image',
Pdf = 'pdf',
}
/**
@ -80,6 +89,8 @@ export enum ContentType {
export type ContentValue = {
value: string
annotations: string[]
name?: string
size?: number
}
/**

View File

@ -0,0 +1,2 @@
export * from './systemResourceInfo'
export * from './promptTemplate'

View File

@ -0,0 +1,6 @@
export type PromptTemplate = {
system_prompt?: string
ai_prompt?: string
user_prompt?: string
error?: string
}

View File

@ -0,0 +1,4 @@
export type SystemResourceInfo = {
numCpuPhysicalCore: number
memAvailable: number
}

View File

@ -7,6 +7,7 @@ export type ModelInfo = {
settings: ModelSettingParams
parameters: ModelRuntimeParams
engine?: InferenceEngine
proxyEngine?: InferenceEngine
}
/**
@ -18,7 +19,13 @@ export enum InferenceEngine {
nitro = 'nitro',
openai = 'openai',
triton_trtllm = 'triton_trtllm',
hf_endpoint = 'hf_endpoint',
tool_retrieval_enabled = 'tool_retrieval_enabled',
}
export type ModelArtifact = {
filename: string
url: string
}
/**
@ -45,7 +52,7 @@ export type Model = {
/**
* The model download source. It can be an external url or a local filepath.
*/
source_url: string
sources: ModelArtifact[]
/**
* The model identifier, which can be referenced in the API endpoints.
@ -85,6 +92,13 @@ export type Model = {
* The model engine.
*/
engine: InferenceEngine
proxyEngine?: InferenceEngine
/**
* Is multimodal or not.
*/
visionModel?: boolean
}
export type ModelMetadata = {
@ -107,6 +121,9 @@ export type ModelSettingParams = {
system_prompt?: string
ai_prompt?: string
user_prompt?: string
llama_model_path?: string
mmproj?: string
cont_batching?: boolean
}
/**
@ -122,4 +139,5 @@ export type ModelRuntimeParams = {
stop?: string[]
frequency_penalty?: number
presence_penalty?: number
engine?: string
}

View File

@ -1,2 +1,3 @@
export * from './threadEntity'
export * from './threadInterface'
export * from './threadEvent'

View File

@ -1,3 +1,4 @@
import { AssistantTool } from '../assistant'
import { ModelInfo } from '../model'
/**
@ -30,6 +31,7 @@ export type ThreadAssistantInfo = {
assistant_name: string
model: ModelInfo
instructions?: string
tools?: AssistantTool[]
}
/**
@ -41,5 +43,4 @@ export type ThreadState = {
waitingForResponse: boolean
error?: Error
lastMessage?: string
isFinishInit?: boolean
}

View File

@ -0,0 +1,4 @@
export enum ThreadEvent {
/** The `OnThreadStarted` event is emitted when a thread is started. */
OnThreadStarted = 'OnThreadStarted',
}

View File

@ -1,5 +1,5 @@
GTM_ID=xxxx
POSTHOG_PROJECT_API_KEY=xxxx
POSTHOG_APP_URL=xxxx
UMAMI_PROJECT_API_KEY=xxxx
UMAMI_APP_URL=xxxx
ALGOLIA_API_KEY=xxxx
ALGOLIA_APP_ID=xxxx

View File

@ -56,7 +56,6 @@ jan/ # Jan root folder
- Each `model` folder contains a `model.json` file, which is a representation of a model.
- `model.json` contains metadata and default parameters used to run a model.
- The only required field is `source_url`.
### Example
@ -64,36 +63,43 @@ Here's a standard example `model.json` for a GGUF model.
```js
{
"id": "zephyr-7b", // Defaults to foldername
"object": "model", // Defaults to "model"
"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
"name": "Zephyr 7B", // Defaults to foldername
"owned_by": "you", // Defaults to "you"
"version": "1", // Defaults to 1
"created": 1231231, // Defaults to file creation time
"description": null, // Defaults to null
"state": enum[null, "ready"]
"format": "ggufv3", // Defaults to "ggufv3"
"engine": "nitro", // engine_id specified in jan/engine folder
"engine_parameters": { // Engine parameters inside model.json can override
"ctx_len": 2048, // the value inside the base engine.json
"id": "zephyr-7b", // Defaults to foldername
"object": "model", // Defaults to "model"
"sources": [
{
"filename": "zephyr-7b-beta.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
}
],
"name": "Zephyr 7B", // Defaults to foldername
"owned_by": "you", // Defaults to "you"
"version": "1", // Defaults to 1
"created": 1231231, // Defaults to file creation time
"description": null, // Defaults to null
"format": "ggufv3", // Defaults to "ggufv3"
"engine": "nitro", // engine_id specified in jan/engine folder
"engine_parameters": {
// Engine parameters inside model.json can override
"ctx_len": 4096, // the value inside the base engine.json
"ngl": 100,
"embedding": true,
"n_parallel": 4,
},
"model_parameters": { // Models are called parameters
"n_parallel": 4
},
"model_parameters": {
// Models are called parameters
"stream": true,
"max_tokens": 2048,
"stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author
"max_tokens": 4096,
"stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author
"frequency_penalty": 0,
"presence_penalty": 0,
"temperature": 0.7,
"top_p": 0.95
},
"metadata": {}, // Defaults to {}
"assets": [ // Defaults to current dir
"file://.../zephyr-7b-q4_k_m.bin",
]
},
"metadata": {}, // Defaults to {}
"assets": [
// Defaults to current dir
"file://.../zephyr-7b-q4_k_m.bin"
]
}
```

View File

@ -31,7 +31,6 @@ In this section, we will show you how to import a GGUF model from [HuggingFace](
## Manually Importing a Downloaded Model (nightly versions and v0.4.4+)
### 1. Create a Model Folder
Navigate to the `~/jan/models` folder. You can find this folder by going to `App Settings` > `Advanced` > `Open App Directory`.
@ -92,7 +91,7 @@ Drag and drop your model binary into this folder, ensuring the `modelname.gguf`
#### 3. Voila
If your model doesn't show up in the Model Selector in conversations, please restart the app.
If your model doesn't show up in the Model Selector in conversations, please restart the app.
If that doesn't work, please feel free to join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions.
@ -190,14 +189,18 @@ Edit `model.json` and include the following configurations:
- Ensure the filename must be `model.json`.
- Ensure the `id` property matches the folder name you created.
- Ensure the GGUF filename should match the `id` property exactly.
- Ensure the `source_url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab.
- Ensure the `source.url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab.
- Ensure you are using the correct `prompt_template`. This is usually provided in the HuggingFace model's description page.
- Ensure the `state` property is set to `ready`.
```json title="model.json"
{
// highlight-start
"source_url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf",
"sources": [
{
"filename": "trinity-v1.Q4_K_M.gguf",
"url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf"
}
],
"id": "trinity-v1-7b",
// highlight-end
"object": "model",
@ -208,7 +211,8 @@ Edit `model.json` and include the following configurations:
"settings": {
"ctx_len": 4096,
// highlight-next-line
"prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:"
"prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "trinity-v1.Q4_K_M.gguf"
},
"parameters": {
"max_tokens": 4096
@ -218,9 +222,7 @@ Edit `model.json` and include the following configurations:
"tags": ["7B", "Merged"],
"size": 4370000000
},
"engine": "nitro",
// highlight-next-line
"state": "ready"
"engine": "nitro"
}
```

View File

@ -40,7 +40,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`
```json title="~/jan/models/gpt-3.5-turbo-16k/model.json"
{
"source_url": "https://openai.com",
"sources": [
{
"filename": "openai",
"url": "https://openai.com"
}
],
// highlight-next-line
"id": "gpt-3.5-turbo-16k",
"object": "model",
@ -55,8 +60,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai",
"state": "ready"
"engine": "openai"
// highlight-end
}
```
@ -118,7 +122,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`
```json title="~/jan/models/mistral-ins-7b-q4/model.json"
{
"source_url": "https://jan.ai",
"sources": [
{
"filename": "janai",
"url": "https://jan.ai"
}
],
// highlight-next-line
"id": "mistral-ins-7b-q4",
"object": "model",
@ -134,8 +143,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`
"tags": ["remote", "awesome"]
},
// highlight-start
"engine": "openai",
"state": "ready"
"engine": "openai"
// highlight-end
}
```

View File

@ -49,7 +49,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln
```json title="~/jan/models/openrouter-dolphin-mixtral-8x7b/model.json"
{
"source_url": "https://openrouter.ai/",
"sources": [
{
"filename": "openrouter",
"url": "https://openrouter.ai/"
}
],
"id": "cognitivecomputations/dolphin-mixtral-8x7b",
"object": "model",
"name": "Dolphin 2.6 Mixtral 8x7B",
@ -63,8 +68,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln
"tags": ["General", "Big Context Length"]
},
// highlight-start
"engine": "openai",
"state": "ready"
"engine": "openai"
// highlight-end
}
```

View File

@ -59,7 +59,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n
```json title="~/jan/models/gpt-35-hieu-jan/model.json"
{
"source_url": "https://hieujan.openai.azure.com",
"sources": [
{
"filename": "azure_openai",
"url": "https://hieujan.openai.azure.com"
}
],
// highlight-next-line
"id": "gpt-35-hieu-jan",
"object": "model",
@ -75,8 +80,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n
"tags": ["General", "Big Context Length"]
},
// highlight-start
"engine": "openai",
"state": "ready"
"engine": "openai"
// highlight-end
}
```

View File

@ -59,18 +59,26 @@ components:
type: string
description: State format of the model, distinct from the engine.
example: ggufv3
source_url:
type: string
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
source:
type: array
items:
type: object
properties:
url:
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
filename:
type: string
description: Filename of the model.
example: zephyr-7b-beta.Q4_K_M.gguf
settings:
type: object
properties:
ctx_len:
type: string
description: Context length.
example: "2048"
example: "4096"
ngl:
type: string
description: Number of layers.
@ -94,7 +102,7 @@ components:
token_limit:
type: string
description: Token limit for the model.
example: "2048"
example: "4096"
top_k:
type: string
description: Top-k setting for the model.
@ -117,7 +125,7 @@ components:
type: string
description: List of assets related to the model.
required:
- source_url
- source
ModelObject:
type: object
properties:
@ -169,11 +177,19 @@ components:
- running
- stopped
description: The current state of the model.
source_url:
type: string
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
source:
type: array
items:
type: object
properties:
url:
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
filename:
type: string
description: Filename of the model.
example: zephyr-7b-beta.Q4_K_M.gguf
engine_parameters:
type: object
properties:
@ -208,8 +224,8 @@ components:
model.
minimum: 128
maximum: 4096
default: 2048
example: 2048
default: 4096
example: 4096
n_parallel:
type: integer
description: Number of parallel operations, relevant when continuous batching is
@ -241,8 +257,8 @@ components:
description: Maximum context length the model can handle.
minimum: 0
maximum: 4096
default: 2048
example: 2048
default: 4096
example: 4096
ngl:
type: integer
description: Number of layers in the neural network.
@ -276,8 +292,8 @@ components:
response.
minimum: 1
maximum: 4096
default: 2048
example: 2048
default: 4096
example: 4096
top_k:
type: integer
description: Limits the model to consider only the top k most likely next tokens
@ -318,7 +334,7 @@ components:
- created
- owned_by
- state
- source_url
- source
- parameters
- metadata
DeleteModelResponse:

View File

@ -1,5 +1,5 @@
import { app, ipcMain, dialog, shell } from 'electron'
import { join, basename } from 'path'
import { join, basename, relative as getRelative, isAbsolute } from 'path'
import { WindowManager } from './../managers/window'
import { getResourcePath } from './../utils/path'
import { AppRoute, AppConfiguration } from '@janhq/core'
@ -50,6 +50,27 @@ export function handleAppIPCs() {
join(...paths)
)
/**
* Checks if the given path is a subdirectory of the given directory.
*
* @param _event - The IPC event object.
* @param from - The path to check.
* @param to - The directory to check against.
*
* @returns {Promise<boolean>} - A promise that resolves with the result.
*/
ipcMain.handle(
AppRoute.isSubdirectory,
async (_event, from: string, to: string) => {
const relative = getRelative(from, to)
const isSubdir =
relative && !relative.startsWith('..') && !isAbsolute(relative)
if (isSubdir === '') return false
else return isSubdir
}
)
/**
* Retrieve basename from given path, respect to the current OS.
*/

View File

@ -1,4 +1,4 @@
import { ipcMain } from 'electron'
import { ipcMain, app } from 'electron'
// @ts-ignore
import reflect from '@alumna/reflect'
@ -38,6 +38,10 @@ export function handleFileMangerIPCs() {
getResourcePath()
)
ipcMain.handle(FileManagerRoute.getUserHomePath, async (_event) =>
app.getPath('home')
)
// handle fs is directory here
ipcMain.handle(
FileManagerRoute.fileStat,
@ -59,4 +63,20 @@ export function handleFileMangerIPCs() {
return fileStat
}
)
ipcMain.handle(
FileManagerRoute.writeBlob,
async (_event, path: string, data: string): Promise<void> => {
try {
const normalizedPath = normalizeFilePath(path)
const dataBuffer = Buffer.from(data, 'base64')
fs.writeFileSync(
join(getJanDataFolderPath(), normalizedPath),
dataBuffer
)
} catch (err) {
console.error(`writeFile ${path} result: ${err}`)
}
}
)
}

View File

@ -1,9 +1,9 @@
import { ipcMain } from 'electron'
import { FileSystemRoute } from '@janhq/core'
import { join } from 'path'
import { getJanDataFolderPath, normalizeFilePath } from '@janhq/core/node'
import fs from 'fs'
import { FileManagerRoute, FileSystemRoute } from '@janhq/core'
import { join } from 'path'
/**
* Handles file system operations.
*/
@ -15,7 +15,7 @@ export function handleFsIPCs() {
mdl[route](
...args.map((arg) =>
typeof arg === 'string' &&
(arg.includes(`file:/`) || arg.includes(`file:\\`))
(arg.startsWith(`file:/`) || arg.startsWith(`file:\\`))
? join(getJanDataFolderPath(), normalizeFilePath(arg))
: arg
)

View File

@ -28,6 +28,22 @@ import { setupCore } from './utils/setup'
app
.whenReady()
.then(async () => {
if (!app.isPackaged) {
// Which means you're running from source code
const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import(
'electron-devtools-installer'
) // Don't use import on top level, since the installer package is dev-only
try {
const name = installExtension(REACT_DEVELOPER_TOOLS)
console.log(`Added Extension: ${name}`)
} catch (err) {
console.log('An error occurred while installing devtools:')
console.error(err)
// Only log the error and don't throw it because it's not critical
}
}
})
.then(setupCore)
.then(createUserSpace)
.then(migrateExtensions)

View File

@ -86,7 +86,7 @@
"request": "^2.88.2",
"request-progress": "^3.0.0",
"rimraf": "^5.0.5",
"typescript": "^5.3.3",
"typescript": "^5.2.2",
"ulid": "^2.3.0",
"use-debounce": "^9.0.4"
},
@ -99,6 +99,7 @@
"@typescript-eslint/parser": "^6.7.3",
"electron": "28.0.0",
"electron-builder": "^24.9.1",
"electron-devtools-installer": "^3.2.0",
"electron-playwright-helpers": "^1.6.0",
"eslint-plugin-react": "^7.33.2",
"run-script-os": "^1.1.6"

View File

@ -1,9 +1,9 @@
import { PlaywrightTestConfig } from "@playwright/test";
import { PlaywrightTestConfig } from '@playwright/test'
const config: PlaywrightTestConfig = {
testDir: "./tests",
testDir: './tests',
retries: 0,
timeout: 120000,
};
globalTimeout: 300000,
}
export default config;
export default config

View File

@ -9,6 +9,7 @@ import {
let electronApp: ElectronApplication
let page: Page
const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')
test.beforeAll(async () => {
process.env.CI = 'e2e'
@ -26,7 +27,9 @@ test.beforeAll(async () => {
})
await stubDialog(electronApp, 'showMessageBox', { response: 1 })
page = await electronApp.firstWindow()
page = await electronApp.firstWindow({
timeout: TIMEOUT,
})
})
test.afterAll(async () => {
@ -34,8 +37,12 @@ test.afterAll(async () => {
await page.close()
})
test('explores models', async () => {
await page.getByTestId('Hub').first().click()
await page.getByTestId('testid-explore-models').isVisible()
// More test cases here...
test('explores hub', async () => {
test.setTimeout(TIMEOUT)
await page.getByTestId('Hub').first().click({
timeout: TIMEOUT,
})
await page.getByTestId('hub-container-test-id').isVisible({
timeout: TIMEOUT,
})
})

View File

@ -1,55 +0,0 @@
import { _electron as electron } from 'playwright'
import { ElectronApplication, Page, expect, test } from '@playwright/test'
import {
findLatestBuild,
parseElectronApp,
stubDialog,
} from 'electron-playwright-helpers'
let electronApp: ElectronApplication
let page: Page
test.beforeAll(async () => {
process.env.CI = 'e2e'
const latestBuild = findLatestBuild('dist')
expect(latestBuild).toBeTruthy()
// parse the packaged Electron app and find paths and other info
const appInfo = parseElectronApp(latestBuild)
expect(appInfo).toBeTruthy()
expect(appInfo.asar).toBe(true)
expect(appInfo.executable).toBeTruthy()
expect(appInfo.main).toBeTruthy()
expect(appInfo.name).toBe('jan')
expect(appInfo.packageJson).toBeTruthy()
expect(appInfo.packageJson.name).toBe('jan')
expect(appInfo.platform).toBeTruthy()
expect(appInfo.platform).toBe(process.platform)
expect(appInfo.resourcesDir).toBeTruthy()
electronApp = await electron.launch({
args: [appInfo.main], // main file from package.json
executablePath: appInfo.executable, // path to the Electron executable
})
await stubDialog(electronApp, 'showMessageBox', { response: 1 })
page = await electronApp.firstWindow()
})
test.afterAll(async () => {
await electronApp.close()
await page.close()
})
test('renders the home page', async () => {
expect(page).toBeDefined()
// Welcome text is available
const welcomeText = await page
.getByTestId('testid-welcome-title')
.first()
.isVisible()
expect(welcomeText).toBe(false)
})

View File

@ -9,6 +9,7 @@ import {
let electronApp: ElectronApplication
let page: Page
const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')
test.beforeAll(async () => {
process.env.CI = 'e2e'
@ -26,7 +27,9 @@ test.beforeAll(async () => {
})
await stubDialog(electronApp, 'showMessageBox', { response: 1 })
page = await electronApp.firstWindow()
page = await electronApp.firstWindow({
timeout: TIMEOUT,
})
})
test.afterAll(async () => {
@ -35,20 +38,24 @@ test.afterAll(async () => {
})
test('renders left navigation panel', async () => {
// Chat section should be there
const chatSection = await page.getByTestId('Chat').first().isVisible()
expect(chatSection).toBe(false)
// Home actions
/* Disable unstable feature tests
** const botBtn = await page.getByTestId("Bot").first().isEnabled();
** Enable back when it is whitelisted
*/
test.setTimeout(TIMEOUT)
const systemMonitorBtn = await page
.getByTestId('System Monitor')
.first()
.isEnabled()
const settingsBtn = await page.getByTestId('Settings').first().isEnabled()
.isEnabled({
timeout: TIMEOUT,
})
const settingsBtn = await page
.getByTestId('Thread')
.first()
.isEnabled({ timeout: TIMEOUT })
expect([systemMonitorBtn, settingsBtn].filter((e) => !e).length).toBe(0)
// Chat section should be there
await page.getByTestId('Local API Server').first().click({
timeout: TIMEOUT,
})
const localServer = await page.getByTestId('local-server-testid').first()
await expect(localServer).toBeVisible({
timeout: TIMEOUT,
})
})

View File

@ -9,6 +9,7 @@ import {
let electronApp: ElectronApplication
let page: Page
const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')
test.beforeAll(async () => {
process.env.CI = 'e2e'
@ -26,7 +27,9 @@ test.beforeAll(async () => {
})
await stubDialog(electronApp, 'showMessageBox', { response: 1 })
page = await electronApp.firstWindow()
page = await electronApp.firstWindow({
timeout: TIMEOUT,
})
})
test.afterAll(async () => {
@ -35,6 +38,8 @@ test.afterAll(async () => {
})
test('shows settings', async () => {
await page.getByTestId('Settings').first().click()
await page.getByTestId('testid-setting-description').isVisible()
test.setTimeout(TIMEOUT)
await page.getByTestId('Settings').first().click({ timeout: TIMEOUT })
const settingDescription = page.getByTestId('testid-setting-description')
await expect(settingDescription).toBeVisible({ timeout: TIMEOUT })
})

View File

@ -1,41 +0,0 @@
import { _electron as electron } from 'playwright'
import { ElectronApplication, Page, expect, test } from '@playwright/test'
import {
findLatestBuild,
parseElectronApp,
stubDialog,
} from 'electron-playwright-helpers'
let electronApp: ElectronApplication
let page: Page
test.beforeAll(async () => {
process.env.CI = 'e2e'
const latestBuild = findLatestBuild('dist')
expect(latestBuild).toBeTruthy()
// parse the packaged Electron app and find paths and other info
const appInfo = parseElectronApp(latestBuild)
expect(appInfo).toBeTruthy()
electronApp = await electron.launch({
args: [appInfo.main], // main file from package.json
executablePath: appInfo.executable, // path to the Electron executable
})
await stubDialog(electronApp, 'showMessageBox', { response: 1 })
page = await electronApp.firstWindow()
})
test.afterAll(async () => {
await electronApp.close()
await page.close()
})
test('shows system monitor', async () => {
await page.getByTestId('System Monitor').first().click()
await page.getByTestId('testid-system-monitor').isVisible()
// More test cases here...
})

View File

@ -3,26 +3,50 @@
"version": "1.0.0",
"description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
"main": "dist/index.js",
"module": "dist/module.js",
"node": "dist/node/index.js",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "tsc -b . && webpack --config webpack.config.js",
"build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install"
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"build:publish:linux": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install",
"build:publish:darwin": "rimraf *.tgz --glob && npm run build && ../../.github/scripts/auto-sign.sh && npm pack && cpx *.tgz ../../electron/pre-install",
"build:publish:win32": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install",
"build:publish": "run-script-os"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-replace": "^5.0.5",
"@types/pdf-parse": "^1.1.4",
"cpx": "^1.5.0",
"rimraf": "^3.0.2",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4"
"rollup": "^2.38.5",
"rollup-plugin-define": "^1.0.1",
"rollup-plugin-sourcemaps": "^0.6.3",
"rollup-plugin-typescript2": "^0.36.0",
"typescript": "^5.3.3",
"run-script-os": "^1.1.6"
},
"dependencies": {
"@janhq/core": "file:../../core",
"@langchain/community": "0.0.13",
"hnswlib-node": "^1.4.2",
"langchain": "^0.0.214",
"path-browserify": "^1.0.1",
"pdf-parse": "^1.1.1",
"ts-loader": "^9.5.0"
},
"files": [
"dist/*",
"package.json",
"README.md"
],
"bundleDependencies": [
"@janhq/core",
"@langchain/community",
"hnswlib-node",
"langchain",
"pdf-parse"
]
}

View File

@ -0,0 +1,81 @@
import resolve from "@rollup/plugin-node-resolve";
import commonjs from "@rollup/plugin-commonjs";
import sourceMaps from "rollup-plugin-sourcemaps";
import typescript from "rollup-plugin-typescript2";
import json from "@rollup/plugin-json";
import replace from "@rollup/plugin-replace";
const packageJson = require("./package.json");
const pkg = require("./package.json");
export default [
{
input: `src/index.ts`,
output: [{ file: pkg.main, format: "es", sourcemap: true }],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: [],
watch: {
include: "src/**",
},
plugins: [
replace({
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
EXTENSION_NAME: JSON.stringify(packageJson.name),
VERSION: JSON.stringify(packageJson.version),
}),
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
// Compile TypeScript files
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
// which external modules to include in the bundle
// https://github.com/rollup/rollup-plugin-node-resolve#usage
resolve({
extensions: [".js", ".ts", ".svelte"],
}),
// Resolve source maps to the original source
sourceMaps(),
],
},
{
input: `src/node/index.ts`,
output: [{ dir: "dist/node", format: "cjs", sourcemap: false }],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: [
"@janhq/core/node",
"@langchain/community",
"langchain",
"langsmith",
"path",
"hnswlib-node",
],
watch: {
include: "src/node/**",
},
// inlineDynamicImports: true,
plugins: [
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs({
ignoreDynamicRequires: true,
}),
// Allow node_modules resolution, so you can use 'external' to control
// which external modules to include in the bundle
// https://github.com/rollup/rollup-plugin-node-resolve#usage
resolve({
extensions: [".ts", ".js", ".json"],
}),
// Resolve source maps to the original source
// sourceMaps(),
],
},
];

View File

@ -1 +1,3 @@
declare const MODULE: string;
declare const NODE: string;
declare const EXTENSION_NAME: string;
declare const VERSION: string;

View File

@ -1,16 +1,151 @@
import { fs, Assistant } from "@janhq/core";
import { AssistantExtension } from "@janhq/core";
import { join } from "path";
import {
fs,
Assistant,
MessageRequest,
events,
InferenceEngine,
MessageEvent,
InferenceEvent,
joinPath,
executeOnMain,
AssistantExtension,
} from "@janhq/core";
export default class JanAssistantExtension extends AssistantExtension {
private static readonly _homeDir = "file://assistants";
controller = new AbortController();
isCancelled = false;
retrievalThreadId: string | undefined = undefined;
async onLoad() {
// making the assistant directory
if (!(await fs.existsSync(JanAssistantExtension._homeDir)))
fs.mkdirSync(JanAssistantExtension._homeDir).then(() => {
this.createJanAssistant();
});
const assistantDirExist = await fs.existsSync(
JanAssistantExtension._homeDir,
);
if (
localStorage.getItem(`${EXTENSION_NAME}-version`) !== VERSION ||
!assistantDirExist
) {
if (!assistantDirExist)
await fs.mkdirSync(JanAssistantExtension._homeDir);
// Write assistant metadata
this.createJanAssistant();
// Finished migration
localStorage.setItem(`${EXTENSION_NAME}-version`, VERSION);
}
// Events subscription
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
JanAssistantExtension.handleMessageRequest(data, this),
);
events.on(InferenceEvent.OnInferenceStopped, () => {
JanAssistantExtension.handleInferenceStopped(this);
});
}
private static async handleInferenceStopped(instance: JanAssistantExtension) {
instance.isCancelled = true;
instance.controller?.abort();
}
private static async handleMessageRequest(
data: MessageRequest,
instance: JanAssistantExtension,
) {
instance.isCancelled = false;
instance.controller = new AbortController();
if (
data.model?.engine !== InferenceEngine.tool_retrieval_enabled ||
!data.messages ||
!data.thread?.assistants[0]?.tools
) {
return;
}
const latestMessage = data.messages[data.messages.length - 1];
// Ingest the document if needed
if (
latestMessage &&
latestMessage.content &&
typeof latestMessage.content !== "string"
) {
const docFile = latestMessage.content[1]?.doc_url?.url;
if (docFile) {
await executeOnMain(
NODE,
"toolRetrievalIngestNewDocument",
docFile,
data.model?.proxyEngine,
);
}
}
// Load agent on thread changed
if (instance.retrievalThreadId !== data.threadId) {
await executeOnMain(NODE, "toolRetrievalLoadThreadMemory", data.threadId);
instance.retrievalThreadId = data.threadId;
// Update the text splitter
await executeOnMain(
NODE,
"toolRetrievalUpdateTextSplitter",
data.thread.assistants[0].tools[0]?.settings?.chunk_size ?? 4000,
data.thread.assistants[0].tools[0]?.settings?.chunk_overlap ?? 200,
);
}
if (latestMessage.content) {
const prompt =
typeof latestMessage.content === "string"
? latestMessage.content
: latestMessage.content[0].text;
// Retrieve the result
console.debug("toolRetrievalQuery", latestMessage.content);
const retrievalResult = await executeOnMain(
NODE,
"toolRetrievalQueryResult",
prompt,
);
// Update the message content
// Using the retrieval template with the result and query
if (data.thread?.assistants[0].tools)
data.messages[data.messages.length - 1].content =
data.thread.assistants[0].tools[0].settings?.retrieval_template
?.replace("{CONTEXT}", retrievalResult)
.replace("{QUESTION}", prompt);
}
// Filter out all the messages that are not text
data.messages = data.messages.map((message) => {
if (
message.content &&
typeof message.content !== "string" &&
(message.content.length ?? 0) > 0
) {
return {
...message,
content: [message.content[0]],
};
}
return message;
});
// Reroute the result to inference engine
const output = {
...data,
model: {
...data.model,
engine: data.model.proxyEngine,
},
};
events.emit(MessageEvent.OnMessageSent, output);
}
/**
@ -19,15 +154,21 @@ export default class JanAssistantExtension extends AssistantExtension {
onUnload(): void {}
async createAssistant(assistant: Assistant): Promise<void> {
const assistantDir = join(JanAssistantExtension._homeDir, assistant.id);
const assistantDir = await joinPath([
JanAssistantExtension._homeDir,
assistant.id,
]);
if (!(await fs.existsSync(assistantDir))) await fs.mkdirSync(assistantDir);
// store the assistant metadata json
const assistantMetadataPath = join(assistantDir, "assistant.json");
const assistantMetadataPath = await joinPath([
assistantDir,
"assistant.json",
]);
try {
await fs.writeFileSync(
assistantMetadataPath,
JSON.stringify(assistant, null, 2)
JSON.stringify(assistant, null, 2),
);
} catch (err) {
console.error(err);
@ -39,14 +180,17 @@ export default class JanAssistantExtension extends AssistantExtension {
// get all the assistant metadata json
const results: Assistant[] = [];
const allFileName: string[] = await fs.readdirSync(
JanAssistantExtension._homeDir
JanAssistantExtension._homeDir,
);
for (const fileName of allFileName) {
const filePath = join(JanAssistantExtension._homeDir, fileName);
const filePath = await joinPath([
JanAssistantExtension._homeDir,
fileName,
]);
if (filePath.includes(".DS_Store")) continue;
const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
(file: string) => file === "assistant.json"
(file: string) => file === "assistant.json",
);
if (jsonFiles.length !== 1) {
@ -55,8 +199,8 @@ export default class JanAssistantExtension extends AssistantExtension {
}
const content = await fs.readFileSync(
join(filePath, jsonFiles[0]),
"utf-8"
await joinPath([filePath, jsonFiles[0]]),
"utf-8",
);
const assistant: Assistant =
typeof content === "object" ? content : JSON.parse(content);
@ -73,7 +217,10 @@ export default class JanAssistantExtension extends AssistantExtension {
}
// remove the directory
const assistantDir = join(JanAssistantExtension._homeDir, assistant.id);
const assistantDir = await joinPath([
JanAssistantExtension._homeDir,
assistant.id,
]);
await fs.rmdirSync(assistantDir);
return Promise.resolve();
}
@ -89,7 +236,24 @@ export default class JanAssistantExtension extends AssistantExtension {
description: "A default assistant that can use all downloaded models",
model: "*",
instructions: "",
tools: undefined,
tools: [
{
type: "retrieval",
enabled: false,
settings: {
top_k: 2,
chunk_size: 1024,
chunk_overlap: 64,
retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
CONTEXT: {CONTEXT}
----------------
QUESTION: {QUESTION}
----------------
Helpful Answer:`,
},
},
],
file_ids: [],
metadata: undefined,
};

View File

@ -0,0 +1,13 @@
import fs from "fs";
import path from "path";
import { getJanDataFolderPath } from "@janhq/core/node";
// Sec: Do not send engine settings over requests
// Read it manually instead
export const readEmbeddingEngine = (engineName: string) => {
const engineSettings = fs.readFileSync(
path.join(getJanDataFolderPath(), "engines", `${engineName}.json`),
"utf-8",
);
return JSON.parse(engineSettings);
};

View File

@ -0,0 +1,39 @@
import { getJanDataFolderPath, normalizeFilePath } from "@janhq/core/node";
import { Retrieval } from "./tools/retrieval";
import path from "path";
const retrieval = new Retrieval();
export async function toolRetrievalUpdateTextSplitter(
chunkSize: number,
chunkOverlap: number,
) {
retrieval.updateTextSplitter(chunkSize, chunkOverlap);
return Promise.resolve();
}
export async function toolRetrievalIngestNewDocument(
file: string,
engine: string,
) {
const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file));
const threadPath = path.dirname(filePath.replace("files", ""));
retrieval.updateEmbeddingEngine(engine);
await retrieval.ingestAgentKnowledge(filePath, `${threadPath}/memory`);
return Promise.resolve();
}
export async function toolRetrievalLoadThreadMemory(threadId: string) {
try {
await retrieval.loadRetrievalAgent(
path.join(getJanDataFolderPath(), "threads", threadId, "memory"),
);
return Promise.resolve();
} catch (err) {
console.debug(err);
}
}
export async function toolRetrievalQueryResult(query: string) {
const res = await retrieval.generateResult(query);
return Promise.resolve(res);
}

View File

@ -0,0 +1,79 @@
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { formatDocumentsAsString } from "langchain/util/document";
import { PDFLoader } from "langchain/document_loaders/fs/pdf";
import { HNSWLib } from "langchain/vectorstores/hnswlib";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { readEmbeddingEngine } from "../../engine";
export class Retrieval {
public chunkSize: number = 100;
public chunkOverlap?: number = 0;
private retriever: any;
private embeddingModel?: OpenAIEmbeddings = undefined;
private textSplitter?: RecursiveCharacterTextSplitter;
constructor(chunkSize: number = 4000, chunkOverlap: number = 200) {
this.updateTextSplitter(chunkSize, chunkOverlap);
}
public updateTextSplitter(chunkSize: number, chunkOverlap: number): void {
this.chunkSize = chunkSize;
this.chunkOverlap = chunkOverlap;
this.textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: chunkSize,
chunkOverlap: chunkOverlap,
});
}
public updateEmbeddingEngine(engine: string): void {
// Engine settings are not compatible with the current embedding model params
// Switch case manually for now
const settings = readEmbeddingEngine(engine);
if (engine === "nitro") {
this.embeddingModel = new OpenAIEmbeddings(
{ openAIApiKey: "nitro-embedding" },
{ basePath: "http://127.0.0.1:3928/v1" }
);
} else {
// Fallback to OpenAI Settings
this.embeddingModel = new OpenAIEmbeddings({
configuration: {
apiKey: settings.api_key,
},
});
}
}
public ingestAgentKnowledge = async (
filePath: string,
memoryPath: string
): Promise<any> => {
const loader = new PDFLoader(filePath, {
splitPages: true,
});
if (!this.embeddingModel) return Promise.reject();
const doc = await loader.load();
const docs = await this.textSplitter!.splitDocuments(doc);
const vectorStore = await HNSWLib.fromDocuments(docs, this.embeddingModel);
return vectorStore.save(memoryPath);
};
public loadRetrievalAgent = async (memoryPath: string): Promise<void> => {
if (!this.embeddingModel) return Promise.reject();
const vectorStore = await HNSWLib.load(memoryPath, this.embeddingModel);
this.retriever = vectorStore.asRetriever(2);
return Promise.resolve();
};
public generateResult = async (query: string): Promise<string> => {
if (!this.retriever) {
return Promise.resolve(" ");
}
const relevantDocs = await this.retriever.getRelevantDocuments(query);
const serializedDoc = formatDocumentsAsString(relevantDocs);
return Promise.resolve(serializedDoc);
};
}

View File

@ -1,14 +1,20 @@
{
"compilerOptions": {
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"target": "es5",
"module": "ES2020",
"lib": ["es2015", "es2016", "es2017", "dom"],
"strict": true,
"sourceMap": true,
"declaration": true,
"allowSyntheticDefaultImports": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"declarationDir": "dist/types",
"outDir": "dist",
"importHelpers": true,
"typeRoots": ["node_modules/@types"],
"skipLibCheck": true,
"rootDir": "./src"
},
"include": ["./src"]
"include": ["src"],
}

View File

@ -1,38 +0,0 @@
const path = require("path");
const webpack = require("webpack");
const packageJson = require("./package.json");
module.exports = {
experiments: { outputModule: true },
entry: "./src/index.ts", // Adjust the entry point to match your project's main file
mode: "production",
module: {
rules: [
{
test: /\.tsx?$/,
use: "ts-loader",
exclude: /node_modules/,
},
],
},
output: {
filename: "index.js", // Adjust the output file name as needed
path: path.resolve(__dirname, "dist"),
library: { type: "module" }, // Specify ESM output format
},
plugins: [
new webpack.DefinePlugin({
MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
}),
],
resolve: {
extensions: [".ts", ".js"],
fallback: {
path: require.resolve("path-browserify"),
},
},
optimization: {
minimize: false,
},
// Add loaders and other configuration as needed for your project
};

View File

@ -4,15 +4,14 @@ import {
ConversationalExtension,
Thread,
ThreadMessage,
events,
} from '@janhq/core'
/**
* JSONConversationalExtension is a ConversationalExtension implementation that provides
* functionality for managing threads.
*/
export default class JSONConversationalExtension
extends ConversationalExtension
{
export default class JSONConversationalExtension extends ConversationalExtension {
private static readonly _homeDir = 'file://threads'
private static readonly _threadInfoFileName = 'thread.json'
private static readonly _threadMessagesFileName = 'messages.jsonl'
@ -119,6 +118,33 @@ export default class JSONConversationalExtension
])
if (!(await fs.existsSync(threadDirPath)))
await fs.mkdirSync(threadDirPath)
if (message.content[0]?.type === 'image') {
const filesPath = await joinPath([threadDirPath, 'files'])
if (!(await fs.existsSync(filesPath))) await fs.mkdirSync(filesPath)
const imagePath = await joinPath([filesPath, `${message.id}.png`])
const base64 = message.content[0].text.annotations[0]
await this.storeImage(base64, imagePath)
if ((await fs.existsSync(imagePath)) && message.content?.length) {
// Use file path instead of blob
message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.png`
}
}
if (message.content[0]?.type === 'pdf') {
const filesPath = await joinPath([threadDirPath, 'files'])
if (!(await fs.existsSync(filesPath))) await fs.mkdirSync(filesPath)
const filePath = await joinPath([filesPath, `${message.id}.pdf`])
const blob = message.content[0].text.annotations[0]
await this.storeFile(blob, filePath)
if ((await fs.existsSync(filePath)) && message.content?.length) {
// Use file path instead of blob
message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.pdf`
}
}
await fs.appendFileSync(threadMessagePath, JSON.stringify(message) + '\n')
Promise.resolve()
} catch (err) {
@ -126,6 +152,25 @@ export default class JSONConversationalExtension
}
}
async storeImage(base64: string, filePath: string): Promise<void> {
const base64Data = base64.replace(/^data:image\/\w+;base64,/, '')
try {
await fs.writeBlob(filePath, base64Data)
} catch (err) {
console.error(err)
}
}
async storeFile(base64: string, filePath: string): Promise<void> {
const base64Data = base64.replace(/^data:application\/pdf;base64,/, '')
try {
await fs.writeBlob(filePath, base64Data)
} catch (err) {
console.error(err)
}
}
async writeMessages(
threadId: string,
messages: ThreadMessage[]
@ -229,7 +274,11 @@ export default class JSONConversationalExtension
const messages: ThreadMessage[] = []
result.forEach((line: string) => {
messages.push(JSON.parse(line) as ThreadMessage)
try {
messages.push(JSON.parse(line) as ThreadMessage)
} catch (err) {
console.error(err)
}
})
return messages
} catch (err) {

View File

@ -1 +1 @@
0.2.12
0.3.5

View File

@ -35,11 +35,12 @@
"rollup-plugin-sourcemaps": "^0.6.3",
"rollup-plugin-typescript2": "^0.36.0",
"run-script-os": "^1.1.6",
"typescript": "^5.3.3"
"typescript": "^5.2.2"
},
"dependencies": {
"@janhq/core": "file:../../core",
"@rollup/plugin-replace": "^5.0.5",
"@types/os-utils": "^0.0.4",
"fetch-retry": "^5.0.6",
"path-browserify": "^1.0.1",
"rxjs": "^7.8.1",

View File

@ -2,22 +2,6 @@ declare const NODE: string;
declare const INFERENCE_URL: string;
declare const TROUBLESHOOTING_URL: string;
/**
* The parameters for the initModel function.
* @property settings - The settings for the machine learning model.
* @property settings.ctx_len - The context length.
* @property settings.ngl - The number of generated tokens.
* @property settings.cont_batching - Whether to use continuous batching.
* @property settings.embedding - Whether to use embedding.
*/
interface EngineSettings {
ctx_len: number;
ngl: number;
cpu_threads: number;
cont_batching: boolean;
embedding: boolean;
}
/**
* The response from the initModel function.
* @property error - An error message if the model fails to load.
@ -26,8 +10,3 @@ interface ModelOperationResponse {
error?: any;
modelFile?: string;
}
interface ResourcesInfo {
numCpuPhysicalCore: number;
memAvailable: number;
}

View File

@ -24,6 +24,7 @@ import {
MessageEvent,
ModelEvent,
InferenceEvent,
ModelSettingParams,
} from "@janhq/core";
import { requestInference } from "./helpers/sse";
import { ulid } from "ulid";
@ -45,12 +46,12 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
private _currentModel: Model | undefined;
private _engineSettings: EngineSettings = {
private _engineSettings: ModelSettingParams = {
ctx_len: 2048,
ngl: 100,
cpu_threads: 1,
cont_batching: false,
embedding: false,
embedding: true,
};
controller = new AbortController();
@ -83,19 +84,19 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
// Events subscription
events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
this.onMessageRequest(data)
this.onMessageRequest(data),
);
events.on(ModelEvent.OnModelInit, (model: Model) =>
this.onModelInit(model)
this.onModelInit(model),
);
events.on(ModelEvent.OnModelStop, (model: Model) =>
this.onModelStop(model)
this.onModelStop(model),
);
events.on(InferenceEvent.OnInferenceStopped, () =>
this.onInferenceStopped()
this.onInferenceStopped(),
);
// Attempt to fetch nvidia info
@ -120,7 +121,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
} else {
await fs.writeFileSync(
engineFile,
JSON.stringify(this._engineSettings, null, 2)
JSON.stringify(this._engineSettings, null, 2),
);
}
} catch (err) {
@ -133,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
const modelFullPath = await joinPath(["models", model.id]);
this._currentModel = model;
const nitroInitResult = await executeOnMain(NODE, "runModel", {
modelFullPath,
model,
@ -143,12 +145,11 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
return;
}
this._currentModel = model;
events.emit(ModelEvent.OnModelReady, model);
this.getNitroProcesHealthIntervalId = setInterval(
() => this.periodicallyGetNitroHealth(),
JanInferenceNitroExtension._intervalHealthCheck
JanInferenceNitroExtension._intervalHealthCheck,
);
}
@ -225,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
*/
private async onMessageRequest(data: MessageRequest) {
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
console.log(
`Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
);
return;
}

View File

@ -3,11 +3,19 @@ import path from "path";
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
import tcpPortUsed from "tcp-port-used";
import fetchRT from "fetch-retry";
import { log, getJanDataFolderPath } from "@janhq/core/node";
import {
log,
getJanDataFolderPath,
getSystemResourceInfo,
} from "@janhq/core/node";
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
import {
Model,
InferenceEngine,
ModelSettingParams,
PromptTemplate,
} from "@janhq/core";
import { executableNitroFile } from "./execute";
import { physicalCpuCount } from "./utils";
// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch);
@ -19,25 +27,6 @@ interface ModelInitOptions {
modelFullPath: string;
model: Model;
}
/**
* The response object of Prompt Template parsing.
*/
interface PromptTemplate {
system_prompt?: string;
ai_prompt?: string;
user_prompt?: string;
error?: string;
}
/**
* Model setting args for Nitro model load.
*/
interface ModelSettingArgs extends ModelSettingParams {
llama_model_path: string;
cpu_threads: number;
}
// The PORT to use for the Nitro subprocess
const PORT = 3928;
// The HOST address to use for the Nitro subprocess
@ -60,7 +49,7 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
// The current model file url
let currentModelFile: string = "";
// The current model settings
let currentSettings: ModelSettingArgs | undefined = undefined;
let currentSettings: ModelSettingParams | undefined = undefined;
/**
* Stops a Nitro subprocess.
@ -78,7 +67,7 @@ function stopModel(): Promise<void> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/
async function runModel(
wrapper: ModelInitOptions
wrapper: ModelInitOptions,
): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model
@ -96,7 +85,7 @@ async function runModel(
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
);
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@ -106,7 +95,7 @@ async function runModel(
if (wrapper.model.engine !== InferenceEngine.nitro) {
return Promise.reject("Not a nitro model");
} else {
const nitroResourceProbe = await getResourcesInfo();
const nitroResourceProbe = await getSystemResourceInfo();
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (wrapper.model.settings.prompt_template) {
const promptTemplate = wrapper.model.settings.prompt_template;
@ -119,13 +108,20 @@ async function runModel(
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
}
const modelFolderPath = path.join(janRoot, "models", wrapper.model.id);
const modelPath = wrapper.model.settings.llama_model_path
? path.join(modelFolderPath, wrapper.model.settings.llama_model_path)
: currentModelFile;
currentSettings = {
llama_model_path: currentModelFile,
...wrapper.model.settings,
llama_model_path: modelPath,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(wrapper.model.settings.mmproj && {
mmproj: path.join(modelFolderPath, wrapper.model.settings.mmproj),
}),
};
console.log(currentSettings);
return runNitroAndLoadModel();
}
}
@ -184,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
promptIndex,
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);
// Return the split parts
@ -197,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
promptIndex + promptMarker.length,
);
// Return the split parts
@ -213,6 +209,9 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
*/
function loadLLMModel(settings: any): Promise<Response> {
if (!settings?.ngl) {
settings.ngl = 100;
}
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: "POST",
@ -226,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
res
)}`
res,
)}`,
);
return Promise.resolve(res);
})
.catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`);
return Promise.reject();
return Promise.reject(err);
});
}
@ -255,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
retryDelay: 500,
}).then(async (res: Response) => {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
res
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
res.status
)}`
);
// If the response is OK, check model_loaded status.
@ -265,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
body
)}`
);
return Promise.resolve();
}
}
log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText
)}`
);
return Promise.reject("Validate model status failed");
});
}
@ -308,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
);
subprocess = spawn(
executableOptions.executablePath,
@ -319,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
},
);
// Handle subprocess output
@ -344,22 +353,6 @@ function spawnNitroProcess(): Promise<any> {
});
}
/**
* Get the system resources information
* TODO: Move to Core so that it can be reused
*/
function getResourcesInfo(): Promise<ResourcesInfo> {
return new Promise(async (resolve) => {
const cpu = await physicalCpuCount();
log(`[NITRO]::CPU informations - ${cpu}`);
const response: ResourcesInfo = {
numCpuPhysicalCore: cpu,
memAvailable: 0,
};
resolve(response);
});
}
/**
* Every module should have a dispose function
* This will be called when the extension is unloaded and should clean up any resources

View File

@ -1,56 +0,0 @@
import os from "os";
import childProcess from "child_process";
function exec(command: string): Promise<string> {
return new Promise((resolve, reject) => {
childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
if (error) {
reject(error);
} else {
resolve(stdout);
}
});
});
}
let amount: number;
const platform = os.platform();
export async function physicalCpuCount(): Promise<number> {
return new Promise((resolve, reject) => {
if (platform === "linux") {
exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
.then((output) => {
amount = parseInt(output.trim(), 10);
resolve(amount);
})
.catch(reject);
} else if (platform === "darwin") {
exec("sysctl -n hw.physicalcpu_max")
.then((output) => {
amount = parseInt(output.trim(), 10);
resolve(amount);
})
.catch(reject);
} else if (platform === "win32") {
exec("WMIC CPU Get NumberOfCores")
.then((output) => {
amount = output
.split(os.EOL)
.map((line: string) => parseInt(line))
.filter((value: number) => !isNaN(value))
.reduce((sum: number, number: number) => sum + number, 1);
resolve(amount);
})
.catch(reject);
} else {
const cores = os.cpus().filter((cpu: any, index: number) => {
const hasHyperthreading = cpu.model.includes("Intel");
const isOdd = index % 2 === 1;
return !hasHyperthreading || isOdd;
});
amount = cores.length;
resolve(amount);
}
});
}

View File

@ -15,6 +15,7 @@ import {
ThreadMessage,
events,
fs,
InferenceEngine,
BaseExtension,
MessageEvent,
ModelEvent,
@ -114,7 +115,7 @@ export default class JanInferenceOpenAIExtension extends BaseExtension {
}
}
private static async handleModelInit(model: OpenAIModel) {
if (model.engine !== "openai") {
if (model.engine !== InferenceEngine.openai) {
return;
} else {
JanInferenceOpenAIExtension._currentModel = model;

View File

@ -3,13 +3,12 @@
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
"rootDir": "./src"
"rootDir": "./src",
},
"include": ["./src"]
"include": ["./src"],
}

View File

@ -3,13 +3,12 @@
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
"rootDir": "./src"
"rootDir": "./src",
},
"include": ["./src"]
"include": ["./src"],
}

View File

@ -1,6 +1,6 @@
{
"name": "@janhq/model-extension",
"version": "1.0.22",
"version": "1.0.23",
"description": "Model Management Extension provides model exploration and seamless downloads",
"main": "dist/index.js",
"module": "dist/module.js",

View File

@ -80,16 +80,34 @@ export default class JanModelExtension extends ModelExtension {
const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)
// try to retrieve the download file name from the source url
// if it fails, use the model ID as the file name
const extractedFileName = await model.source_url.split('/').pop()
if (model.sources.length > 1) {
// path to model binaries
for (const source of model.sources) {
let path = this.extractFileName(source.url)
if (source.filename) {
path = await joinPath([modelDirPath, source.filename])
}
downloadFile(source.url, path, network)
}
} else {
const fileName = this.extractFileName(model.sources[0]?.url)
const path = await joinPath([modelDirPath, fileName])
downloadFile(model.sources[0]?.url, path, network)
}
}
/**
* try to retrieve the download file name from the source url
*/
private extractFileName(url: string): string {
const extractedFileName = url.split('/').pop()
const fileName = extractedFileName
.toLowerCase()
.endsWith(JanModelExtension._supportedModelFormat)
? extractedFileName
: model.id
const path = await joinPath([modelDirPath, fileName])
downloadFile(model.source_url, path, network)
: extractedFileName + JanModelExtension._supportedModelFormat
return fileName
}
/**
@ -98,6 +116,7 @@ export default class JanModelExtension extends ModelExtension {
* @returns {Promise<void>} A promise that resolves when the download has been cancelled.
*/
async cancelModelDownload(modelId: string): Promise<void> {
const model = await this.getConfiguredModels()
return abortDownload(
await joinPath([JanModelExtension._homeDir, modelId, modelId])
).then(async () => {
@ -163,15 +182,16 @@ export default class JanModelExtension extends ModelExtension {
.then((files: string[]) => {
// or model binary exists in the directory
// model binary name can match model ID or be a .gguf file and not be an incompleted model file
// TODO: Check diff between urls, filenames
return (
files.includes(modelDir) ||
files.some(
files.filter(
(file) =>
file
.toLowerCase()
.includes(JanModelExtension._supportedModelFormat) &&
!file.endsWith(JanModelExtension._incompletedModelFileName)
)
)?.length >= model.sources.length
)
})
}
@ -198,7 +218,6 @@ export default class JanModelExtension extends ModelExtension {
const readJsonPromises = allDirectories.map(async (dirName) => {
// filter out directories that don't match the selector
// read model.json
const jsonPath = await joinPath([
JanModelExtension._homeDir,
@ -226,7 +245,21 @@ export default class JanModelExtension extends ModelExtension {
const modelData = results.map((result) => {
if (result.status === 'fulfilled') {
try {
return result.value as Model
// This to ensure backward compatibility with `model.json` with `source_url`
const tmpModel =
typeof result.value === 'object'
? result.value
: JSON.parse(result.value)
if (tmpModel['source_url'] != null) {
tmpModel['source'] = [
{
filename: tmpModel.id,
url: tmpModel['source_url'],
},
]
}
return tmpModel as Model
} catch {
console.debug(`Unable to parse model metadata: ${result.value}`)
return undefined

View File

@ -1,5 +1,4 @@
import { MonitoringExtension } from "@janhq/core";
import { executeOnMain } from "@janhq/core";
import { MonitoringExtension, executeOnMain } from "@janhq/core";
/**
* JanMonitoringExtension is a extension that provides system monitoring functionality.

View File

@ -0,0 +1,33 @@
{
"sources": [
{
"filename": "ggml-model-q5_k.gguf",
"url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q5_k.gguf"
},
{
"filename": "mmproj-model-f16.gguf",
"url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf"
}
],
"id": "bakllava-1",
"object": "model",
"name": "BakLlava 1",
"version": "1.0",
"description": "BakLlava 1 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Mys",
"tags": ["Vision"],
"size": 5750000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf",
"id": "capybara-34b",
"object": "model",
"name": "Capybara 200k 34B Q5",
"version": "1.0",
"description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "USER:\n{prompt}\nASSISTANT:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "NousResearch, The Bloke",
"tags": ["34B", "Finetuned"],
"size": 24320000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "nous-capybara-34b.Q5_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf"
}
],
"id": "capybara-34b",
"object": "model",
"name": "Capybara 200k 34B Q5",
"version": "1.0",
"description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "USER:\n{prompt}\nASSISTANT:",
"llama_model_path": "nous-capybara-34b.Q5_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "NousResearch, The Bloke",
"tags": ["34B", "Finetuned"],
"size": 24320000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,33 @@
{
"source_url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf",
"id": "codeninja-1.0-7b",
"object": "model",
"name": "CodeNinja 7B Q4",
"version": "1.0",
"description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": ["<|end_of_turn|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Beowolx",
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
"url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf"
}
],
"id": "codeninja-1.0-7b",
"object": "model",
"name": "CodeNinja 7B Q4",
"version": "1.0",
"description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Beowolx",
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -2,7 +2,12 @@
"object": "model",
"version": 1,
"format": "gguf",
"source_url": "N/A",
"sources": [
{
"url": "N/A",
"filename": "N/A"
}
],
"id": "N/A",
"name": "N/A",
"created": 0,
@ -10,7 +15,8 @@
"settings": {
"ctx_len": 4096,
"embedding": false,
"prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:"
"prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
"llama_model_path": "N/A"
},
"parameters": {
"temperature": 0.7,

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf",
"id": "deepseek-coder-1.3b",
"object": "model",
"name": "Deepseek Coder 1.3B Q8",
"version": "1.0",
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Deepseek, The Bloke",
"tags": ["Tiny", "Foundational Model"],
"size": 1430000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
"url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf"
}
],
"id": "deepseek-coder-1.3b",
"object": "model",
"name": "Deepseek Coder 1.3B Q8",
"version": "1.0",
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Deepseek, The Bloke",
"tags": ["Tiny", "Foundational Model"],
"size": 1430000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf",
"id": "deepseek-coder-34b",
"object": "model",
"name": "Deepseek Coder 33B Q5",
"version": "1.0",
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Deepseek, The Bloke",
"tags": ["34B", "Foundational Model"],
"size": 19940000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf",
"url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf"
}
],
"id": "deepseek-coder-34b",
"object": "model",
"name": "Deepseek Coder 33B Q5",
"version": "1.0",
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Deepseek, The Bloke",
"tags": ["34B", "Foundational Model"],
"size": 19940000000
},
"engine": "nitro"
}

View File

@ -1,28 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
"id": "dolphin-2.7-mixtral-8x7b",
"object": "model",
"name": "Dolphin 8x7B Q4",
"version": "1.0",
"description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Cognitive Computations, TheBloke",
"tags": ["70B", "Finetuned"],
"size": 26440000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
}
],
"id": "dolphin-2.7-mixtral-8x7b",
"object": "model",
"name": "Dolphin 8x7B Q4",
"version": "1.0",
"description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Cognitive Computations, TheBloke",
"tags": ["70B", "Finetuned"],
"size": 26440000000
},
"engine": "nitro"
}

View File

@ -1,18 +1,20 @@
{
"source_url": "https://openai.com",
"id": "gpt-3.5-turbo-16k-0613",
"object": "model",
"name": "OpenAI GPT 3.5 Turbo 16k 0613",
"version": "1.0",
"description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai",
"state": "ready"
"sources": [
{
"url": "https://openai.com"
}
],
"id": "gpt-3.5-turbo-16k-0613",
"object": "model",
"name": "OpenAI GPT 3.5 Turbo 16k 0613",
"version": "1.0",
"description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai"
}

View File

@ -1,18 +1,20 @@
{
"source_url": "https://openai.com",
"id": "gpt-3.5-turbo",
"object": "model",
"name": "OpenAI GPT 3.5 Turbo",
"version": "1.0",
"description": "OpenAI GPT 3.5 Turbo model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai",
"state": "ready"
"sources": [
{
"url": "https://openai.com"
}
],
"id": "gpt-3.5-turbo",
"object": "model",
"name": "OpenAI GPT 3.5 Turbo",
"version": "1.0",
"description": "OpenAI GPT 3.5 Turbo model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai"
}

View File

@ -1,18 +1,20 @@
{
"source_url": "https://openai.com",
"id": "gpt-4",
"object": "model",
"name": "OpenAI GPT 4",
"version": "1.0",
"description": "OpenAI GPT 4 model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai",
"state": "ready"
"sources": [
{
"url": "https://openai.com"
}
],
"id": "gpt-4",
"object": "model",
"name": "OpenAI GPT 4",
"version": "1.0",
"description": "OpenAI GPT 4 model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf",
"id": "llama2-chat-70b-q4",
"object": "model",
"name": "Llama 2 Chat 70B Q4",
"version": "1.0",
"description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MetaAI, The Bloke",
"tags": ["70B", "Foundational Model"],
"size": 43920000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "llama-2-70b-chat.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf"
}
],
"id": "llama2-chat-70b-q4",
"object": "model",
"name": "Llama 2 Chat 70B Q4",
"version": "1.0",
"description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MetaAI, The Bloke",
"tags": ["70B", "Foundational Model"],
"size": 43920000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf",
"id": "llama2-chat-7b-q4",
"object": "model",
"name": "Llama 2 Chat 7B Q4",
"version": "1.0",
"description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MetaAI, The Bloke",
"tags": ["7B", "Foundational Model"],
"size": 4080000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "llama-2-7b-chat.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
}
],
"id": "llama2-chat-7b-q4",
"object": "model",
"name": "Llama 2 Chat 7B Q4",
"version": "1.0",
"description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MetaAI, The Bloke",
"tags": ["7B", "Foundational Model"],
"size": 4080000000
},
"engine": "nitro"
}

View File

@ -0,0 +1,33 @@
{
"sources": [
{
"filename": "ggml-model-q5_k.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/ggml-model-q5_k.gguf"
},
{
"filename": "mmproj-model-f16.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/mmproj-model-f16.gguf"
}
],
"id": "llava-1.5-13b-q5",
"object": "model",
"name": "LlaVa 1.5 13B Q5 K",
"version": "1.0",
"description": "LlaVa 1.5 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Mys",
"tags": ["Vision"],
"size": 9850000000
},
"engine": "nitro"
}

View File

@ -0,0 +1,33 @@
{
"sources": [
{
"filename": "ggml-model-q5_k.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf"
},
{
"filename": "mmproj-model-f16.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf"
}
],
"id": "llava-1.5-7b-q5",
"object": "model",
"name": "LlaVa 1.5 7B Q5 K",
"version": "1.0",
"description": "LlaVa 1.5 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Mys",
"tags": ["Vision"],
"size": 5400000000
},
"engine": "nitro"
}

View File

@ -1,30 +1,35 @@
{
"source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"id": "mistral-ins-7b-q4",
"object": "model",
"name": "Mistral Instruct 7B Q4",
"version": "1.0",
"description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] {prompt} [/INST]"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MistralAI, The Bloke",
"tags": ["Featured", "7B", "Foundational Model"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
},
"engine": "nitro"
}
"sources": [
{
"filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
}
],
"id": "mistral-ins-7b-q4",
"object": "model",
"name": "Mistral Instruct 7B Q4",
"version": "1.0",
"description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] {prompt} [/INST]",
"llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MistralAI, The Bloke",
"tags": ["Featured", "7B", "Foundational Model"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
},
"engine": "nitro"
}

View File

@ -1,28 +1,33 @@
{
"source_url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
"id": "mixtral-8x7b-instruct",
"object": "model",
"name": "Mixtral 8x7B Instruct Q4",
"version": "1.0",
"description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] {prompt} [/INST]"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MistralAI, TheBloke",
"tags": ["70B", "Foundational Model"],
"size": 26440000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
}
],
"id": "mixtral-8x7b-instruct",
"object": "model",
"name": "Mixtral 8x7B Instruct Q4",
"version": "1.0",
"description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] {prompt} [/INST]",
"llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "MistralAI, TheBloke",
"tags": ["70B", "Foundational Model"],
"size": 26440000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf",
"id": "noromaid-7b",
"object": "model",
"name": "Noromaid 7B Q5",
"version": "1.0",
"description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:{prompt}\n### Response:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "NeverSleep",
"tags": ["7B", "Merged"],
"size": 4370000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "Noromaid-7b-v0.1.1.q5_k_m.gguf",
"url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf"
}
],
"id": "noromaid-7b",
"object": "model",
"name": "Noromaid 7B Q5",
"version": "1.0",
"description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:{prompt}\n### Response:",
"llama_model_path": "Noromaid-7b-v0.1.1.q5_k_m.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "NeverSleep",
"tags": ["7B", "Merged"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,28 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf",
"id": "openchat-3.5-7b",
"object": "model",
"name": "Openchat-3.5 7B Q4",
"version": "1.0",
"description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": ["<|end_of_turn|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Openchat",
"tags": ["Recommended", "7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "openchat-3.5-1210.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf"
}
],
"id": "openchat-3.5-7b",
"object": "model",
"name": "Openchat-3.5 7B Q4",
"version": "1.0",
"description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"llama_model_path": "openchat-3.5-1210.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": ["<|end_of_turn|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Openchat",
"tags": ["Recommended", "7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
"id": "openhermes-neural-7b",
"object": "model",
"name": "OpenHermes Neural 7B Q4",
"version": "1.0",
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Intel, Jan",
"tags": ["7B", "Merged", "Featured"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
},
"engine": "nitro"
}
"sources": [
{
"filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
"url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
}
],
"id": "openhermes-neural-7b",
"object": "model",
"name": "OpenHermes Neural 7B Q4",
"version": "1.0",
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Intel, Jan",
"tags": ["7B", "Merged", "Featured"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf",
"id": "phi-2-3b",
"object": "model",
"name": "Phi-2 3B Q8",
"version": "1.0",
"description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "Intruct:\n{prompt}\nOutput:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Microsoft",
"tags": ["3B","Foundational Model"],
"size": 2960000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "phi-2.Q8_0.gguf",
"url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf"
}
],
"id": "phi-2-3b",
"object": "model",
"name": "Phi-2 3B Q8",
"version": "1.0",
"description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "Intruct:\n{prompt}\nOutput:",
"llama_model_path": "phi-2.Q8_0.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Microsoft",
"tags": ["3B", "Foundational Model"],
"size": 2960000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf",
"id": "phind-34b",
"object": "model",
"name": "Phind 34B Q5",
"version": "1.0",
"description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Phind, The Bloke",
"tags": ["34B", "Finetuned"],
"size": 20220000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "phind-codellama-34b-v2.Q5_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf"
}
],
"id": "phind-34b",
"object": "model",
"name": "Phind 34B Q5",
"version": "1.0",
"description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
"llama_model_path": "phind-codellama-34b-v2.Q5_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Phind, The Bloke",
"tags": ["34B", "Finetuned"],
"size": 20220000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,33 @@
{
"source_url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf",
"id": "solar-10.7b-slerp",
"object": "model",
"name": "Solar Slerp 10.7B Q4",
"version": "1.0",
"description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### User: {prompt}\n### Assistant:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Jan",
"tags": ["13B","Finetuned"],
"size": 6360000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "solar-10.7b-slerp.Q4_K_M.gguf",
"url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf"
}
],
"id": "solar-10.7b-slerp",
"object": "model",
"name": "Solar Slerp 10.7B Q4",
"version": "1.0",
"description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### User: {prompt}\n### Assistant:",
"llama_model_path": "solar-10.7b-slerp.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Jan",
"tags": ["13B", "Finetuned"],
"size": 6360000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf",
"id": "starling-7b",
"object": "model",
"name": "Starling alpha 7B Q4",
"version": "1.0",
"description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": ["<|end_of_turn|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Berkeley-nest, The Bloke",
"tags": ["7B","Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "starling-lm-7b-alpha.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf"
}
],
"id": "starling-7b",
"object": "model",
"name": "Starling alpha 7B Q4",
"version": "1.0",
"description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:",
"llama_model_path": "starling-lm-7b-alpha.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": ["<|end_of_turn|>"],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Berkeley-nest, The Bloke",
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,32 +1,33 @@
{
"source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf",
"id": "stealth-v1.2-7b",
"object": "model",
"name": "Stealth 7B Q4",
"version": "1.0",
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Jan",
"tags": [
"7B",
"Finetuned",
"Featured"
],
"size": 4370000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "stealth-v1.3.Q4_K_M.gguf",
"url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf"
}
],
"id": "stealth-v1.2-7b",
"object": "model",
"name": "Stealth 7B Q4",
"version": "1.0",
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "stealth-v1.3.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Finetuned", "Featured"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,5 +1,10 @@
{
"source_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"sources": [
{
"filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
}
],
"id": "tinyllama-1.1b",
"object": "model",
"name": "TinyLlama Chat 1.1B Q4",
@ -7,8 +12,9 @@
"description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
"format": "gguf",
"settings": {
"ctx_len": 2048,
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>"
"ctx_len": 4096,
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
"llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
@ -20,9 +26,9 @@
"presence_penalty": 0
},
"metadata": {
"author": "TinyLlama",
"tags": ["Tiny", "Foundation Model"],
"size": 669000000
"author": "TinyLlama",
"tags": ["Tiny", "Foundation Model"],
"size": 669000000
},
"engine": "nitro"
}
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf",
"id": "trinity-v1.2-7b",
"object": "model",
"name": "Trinity-v1.2 7B Q4",
"version": "1.0",
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Merged", "Featured"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
},
"engine": "nitro"
}
"sources": [
{
"filename": "trinity-v1.2.Q4_K_M.gguf",
"url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf"
}
],
"id": "trinity-v1.2-7b",
"object": "model",
"name": "Trinity-v1.2 7B Q4",
"version": "1.0",
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "trinity-v1.2.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Merged", "Featured"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
},
"engine": "nitro"
}

View File

@ -1,28 +1,33 @@
{
"source_url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf",
"id": "tulu-2-70b",
"object": "model",
"name": "Tulu 2 70B Q4",
"version": "1.0",
"description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|user|>\n{prompt}\n<|assistant|>"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Lizpreciatior, The Bloke",
"tags": ["70B", "Finetuned"],
"size": 41400000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "tulu-2-dpo-70b.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf"
}
],
"id": "tulu-2-70b",
"object": "model",
"name": "Tulu 2 70B Q4",
"version": "1.0",
"description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|user|>\n{prompt}\n<|assistant|>",
"llama_model_path": "tulu-2-dpo-70b.Q4_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Lizpreciatior, The Bloke",
"tags": ["70B", "Finetuned"],
"size": 41400000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
"id": "wizardcoder-13b",
"object": "model",
"name": "Wizard Coder Python 13B Q5",
"version": "1.0",
"description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "WizardLM, The Bloke",
"tags": ["Recommended", "13B", "Finetuned"],
"size": 7870000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
"url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
}
],
"id": "wizardcoder-13b",
"object": "model",
"name": "Wizard Coder Python 13B Q5",
"version": "1.0",
"description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "WizardLM, The Bloke",
"tags": ["Recommended", "13B", "Finetuned"],
"size": 7870000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,31 @@
{
"source_url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf",
"id": "yarn-mistral-7b",
"object": "model",
"name": "Yarn Mistral 7B Q4",
"version": "1.0",
"description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "{prompt}"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "NousResearch, The Bloke",
"tags": ["7B","Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}
"sources": [
{
"url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf"
}
],
"id": "yarn-mistral-7b",
"object": "model",
"name": "Yarn Mistral 7B Q4",
"version": "1.0",
"description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "{prompt}"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "NousResearch, The Bloke",
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{
"source_url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf",
"id": "yi-34b",
"object": "model",
"name": "Yi 34B Q5",
"version": "1.0",
"description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "01-ai, The Bloke",
"tags": ["34B", "Foundational Model"],
"size": 20660000000
},
"engine": "nitro"
}
"sources": [
{
"filename": "yi-34b-chat.Q5_K_M.gguf",
"url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf"
}
],
"id": "yi-34b",
"object": "model",
"name": "Yi 34B Q5",
"version": "1.0",
"description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"llama_model_path": "yi-34b-chat.Q5_K_M.gguf"
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "01-ai, The Bloke",
"tags": ["34B", "Foundational Model"],
"size": 20660000000
},
"engine": "nitro"
}

Some files were not shown because too many files have changed in this diff Show More