Merge branch 'dev' into chore/update-jan-web
This commit is contained in:
commit
3b0d4aadb1
@ -1 +1 @@
|
|||||||
1.0.7
|
1.0.8
|
||||||
|
|||||||
@ -18,6 +18,16 @@
|
|||||||
"placeholder": "4"
|
"placeholder": "4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"key": "cpu_threads",
|
||||||
|
"title": "CPU Threads",
|
||||||
|
"description": "The number of CPU threads to use (when in CPU mode)",
|
||||||
|
"controllerType": "input",
|
||||||
|
"controllerProps": {
|
||||||
|
"value": "",
|
||||||
|
"placeholder": "Number of CPU threads"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"key": "flash_attn",
|
"key": "flash_attn",
|
||||||
"title": "Flash Attention enabled",
|
"title": "Flash Attention enabled",
|
||||||
|
|||||||
@ -42,6 +42,7 @@ export enum Settings {
|
|||||||
flash_attn = 'flash_attn',
|
flash_attn = 'flash_attn',
|
||||||
cache_type = 'cache_type',
|
cache_type = 'cache_type',
|
||||||
use_mmap = 'use_mmap',
|
use_mmap = 'use_mmap',
|
||||||
|
cpu_threads = 'cpu_threads',
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -65,6 +66,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
flash_attn: boolean = true
|
flash_attn: boolean = true
|
||||||
use_mmap: boolean = true
|
use_mmap: boolean = true
|
||||||
cache_type: string = 'f16'
|
cache_type: string = 'f16'
|
||||||
|
cpu_threads?: number
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The URL for making inference requests.
|
* The URL for making inference requests.
|
||||||
@ -104,6 +106,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
|
this.flash_attn = await this.getSetting<boolean>(Settings.flash_attn, true)
|
||||||
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
|
this.use_mmap = await this.getSetting<boolean>(Settings.use_mmap, true)
|
||||||
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'f16')
|
this.cache_type = await this.getSetting<string>(Settings.cache_type, 'f16')
|
||||||
|
const threads_number = Number(
|
||||||
|
await this.getSetting<string>(Settings.cpu_threads, '')
|
||||||
|
)
|
||||||
|
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
||||||
|
|
||||||
this.queue.add(() => this.clean())
|
this.queue.add(() => this.clean())
|
||||||
|
|
||||||
@ -139,6 +145,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
this.cache_type = value as string
|
this.cache_type = value as string
|
||||||
} else if (key === Settings.use_mmap && typeof value === 'boolean') {
|
} else if (key === Settings.use_mmap && typeof value === 'boolean') {
|
||||||
this.use_mmap = value as boolean
|
this.use_mmap = value as boolean
|
||||||
|
} else if (key === Settings.cpu_threads && typeof value === 'string') {
|
||||||
|
const threads_number = Number(value)
|
||||||
|
if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,6 +205,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
|||||||
flash_attn: this.flash_attn,
|
flash_attn: this.flash_attn,
|
||||||
cache_type: this.cache_type,
|
cache_type: this.cache_type,
|
||||||
use_mmap: this.use_mmap,
|
use_mmap: this.use_mmap,
|
||||||
|
...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
|
||||||
},
|
},
|
||||||
timeout: false,
|
timeout: false,
|
||||||
signal,
|
signal,
|
||||||
|
|||||||
@ -98,7 +98,7 @@ export const useCreateNewThread = () => {
|
|||||||
// Use ctx length by default
|
// Use ctx length by default
|
||||||
const overriddenParameters = {
|
const overriddenParameters = {
|
||||||
max_tokens: !isLocalEngine(defaultModel?.engine)
|
max_tokens: !isLocalEngine(defaultModel?.engine)
|
||||||
? (defaultModel?.parameters.token_limit ?? 8192)
|
? (defaultModel?.parameters.max_tokens ?? 8192)
|
||||||
: defaultContextLength,
|
: defaultContextLength,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -417,7 +417,7 @@ const Advanced = () => {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Vulkan for AMD GPU/ APU and Intel Arc GPU */}
|
{/* Vulkan for AMD GPU/ APU and Intel Arc GPU */}
|
||||||
{!isMac && gpuList.length > 0 && experimentalEnabled && (
|
{!isMac && experimentalEnabled && (
|
||||||
<div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
|
<div className="flex w-full flex-col items-start justify-between gap-4 border-b border-[hsla(var(--app-border))] py-4 first:pt-0 last:border-none sm:flex-row">
|
||||||
<div className="space-y-1">
|
<div className="space-y-1">
|
||||||
<div className="flex gap-x-2">
|
<div className="flex gap-x-2">
|
||||||
|
|||||||
@ -257,7 +257,7 @@ const ThreadRightPanel = () => {
|
|||||||
id="assistant-instructions"
|
id="assistant-instructions"
|
||||||
placeholder="Eg. You are a helpful assistant."
|
placeholder="Eg. You are a helpful assistant."
|
||||||
value={activeAssistant?.instructions ?? ''}
|
value={activeAssistant?.instructions ?? ''}
|
||||||
// autoResize
|
autoResize
|
||||||
onChange={onAssistantInstructionChanged}
|
onChange={onAssistantInstructionChanged}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user