chore: cortex.cpp gpu activation could cause a race condition (#4825)
This commit is contained in:
parent
2271c8d3d6
commit
7e46295af1
@ -22,22 +22,26 @@ type MessageList = {
|
||||
export default class CortexConversationalExtension extends ConversationalExtension {
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
|
||||
api?: KyInstance
|
||||
/**
|
||||
* Extended API instance for making requests to the Cortex API.
|
||||
* Get the API instance
|
||||
* @returns
|
||||
*/
|
||||
api: KyInstance
|
||||
async apiInstance(): Promise<KyInstance> {
|
||||
if(this.api) return this.api
|
||||
const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
|
||||
this.api = ky.extend({
|
||||
prefixUrl: API_URL,
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
return this.api
|
||||
}
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
const apiKey = await window.core?.api.appToken() ?? 'cortex.cpp'
|
||||
this.api = ky.extend({
|
||||
prefixUrl: API_URL,
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
this.queue.add(() => this.healthz())
|
||||
}
|
||||
|
||||
@ -51,10 +55,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
*/
|
||||
async listThreads(): Promise<Thread[]> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get('v1/threads?limit=-1')
|
||||
.json<ThreadList>()
|
||||
.then((e) => e.data)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get('v1/threads?limit=-1')
|
||||
.json<ThreadList>()
|
||||
.then((e) => e.data)
|
||||
)
|
||||
) as Promise<Thread[]>
|
||||
}
|
||||
|
||||
@ -64,7 +70,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
*/
|
||||
async createThread(thread: Thread): Promise<Thread> {
|
||||
return this.queue.add(() =>
|
||||
this.api.post('v1/threads', { json: thread }).json<Thread>()
|
||||
this.apiInstance().then((api) =>
|
||||
api.post('v1/threads', { json: thread }).json<Thread>()
|
||||
)
|
||||
) as Promise<Thread>
|
||||
}
|
||||
|
||||
@ -75,7 +83,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
async modifyThread(thread: Thread): Promise<void> {
|
||||
return this.queue
|
||||
.add(() =>
|
||||
this.api.patch(`v1/threads/${thread.id}`, { json: thread })
|
||||
this.apiInstance().then((api) =>
|
||||
api.patch(`v1/threads/${thread.id}`, { json: thread })
|
||||
)
|
||||
)
|
||||
.then()
|
||||
}
|
||||
@ -86,7 +96,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
*/
|
||||
async deleteThread(threadId: string): Promise<void> {
|
||||
return this.queue
|
||||
.add(() => this.api.delete(`v1/threads/${threadId}`))
|
||||
.add(() =>
|
||||
this.apiInstance().then((api) => api.delete(`v1/threads/${threadId}`))
|
||||
)
|
||||
.then()
|
||||
}
|
||||
|
||||
@ -97,11 +109,13 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
*/
|
||||
async createMessage(message: ThreadMessage): Promise<ThreadMessage> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post(`v1/threads/${message.thread_id}/messages`, {
|
||||
json: message,
|
||||
})
|
||||
.json<ThreadMessage>()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post(`v1/threads/${message.thread_id}/messages`, {
|
||||
json: message,
|
||||
})
|
||||
.json<ThreadMessage>()
|
||||
)
|
||||
) as Promise<ThreadMessage>
|
||||
}
|
||||
|
||||
@ -112,14 +126,13 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
*/
|
||||
async modifyMessage(message: ThreadMessage): Promise<ThreadMessage> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.patch(
|
||||
`v1/threads/${message.thread_id}/messages/${message.id}`,
|
||||
{
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.patch(`v1/threads/${message.thread_id}/messages/${message.id}`, {
|
||||
json: message,
|
||||
}
|
||||
)
|
||||
.json<ThreadMessage>()
|
||||
})
|
||||
.json<ThreadMessage>()
|
||||
)
|
||||
) as Promise<ThreadMessage>
|
||||
}
|
||||
|
||||
@ -132,7 +145,9 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
async deleteMessage(threadId: string, messageId: string): Promise<void> {
|
||||
return this.queue
|
||||
.add(() =>
|
||||
this.api.delete(`v1/threads/${threadId}/messages/${messageId}`)
|
||||
this.apiInstance().then((api) =>
|
||||
api.delete(`v1/threads/${threadId}/messages/${messageId}`)
|
||||
)
|
||||
)
|
||||
.then()
|
||||
}
|
||||
@ -144,10 +159,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
*/
|
||||
async listMessages(threadId: string): Promise<ThreadMessage[]> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get(`v1/threads/${threadId}/messages?order=asc&limit=-1`)
|
||||
.json<MessageList>()
|
||||
.then((e) => e.data)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get(`v1/threads/${threadId}/messages?order=asc&limit=-1`)
|
||||
.json<MessageList>()
|
||||
.then((e) => e.data)
|
||||
)
|
||||
) as Promise<ThreadMessage[]>
|
||||
}
|
||||
|
||||
@ -159,9 +176,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
*/
|
||||
async getThreadAssistant(threadId: string): Promise<ThreadAssistantInfo> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get(`v1/assistants/${threadId}?limit=-1`)
|
||||
.json<ThreadAssistantInfo>()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get(`v1/assistants/${threadId}?limit=-1`)
|
||||
.json<ThreadAssistantInfo>()
|
||||
)
|
||||
) as Promise<ThreadAssistantInfo>
|
||||
}
|
||||
/**
|
||||
@ -175,9 +194,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
assistant: ThreadAssistantInfo
|
||||
): Promise<ThreadAssistantInfo> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post(`v1/assistants/${threadId}`, { json: assistant })
|
||||
.json<ThreadAssistantInfo>()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post(`v1/assistants/${threadId}`, { json: assistant })
|
||||
.json<ThreadAssistantInfo>()
|
||||
)
|
||||
) as Promise<ThreadAssistantInfo>
|
||||
}
|
||||
|
||||
@ -192,9 +213,11 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
assistant: ThreadAssistantInfo
|
||||
): Promise<ThreadAssistantInfo> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.patch(`v1/assistants/${threadId}`, { json: assistant })
|
||||
.json<ThreadAssistantInfo>()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.patch(`v1/assistants/${threadId}`, { json: assistant })
|
||||
.json<ThreadAssistantInfo>()
|
||||
)
|
||||
) as Promise<ThreadAssistantInfo>
|
||||
}
|
||||
|
||||
@ -203,10 +226,12 @@ export default class CortexConversationalExtension extends ConversationalExtensi
|
||||
* @returns
|
||||
*/
|
||||
async healthz(): Promise<void> {
|
||||
return this.api
|
||||
.get('healthz', {
|
||||
retry: { limit: 20, delay: () => 500, methods: ['get'] },
|
||||
})
|
||||
return this.apiInstance()
|
||||
.then((api) =>
|
||||
api.get('healthz', {
|
||||
retry: { limit: 20, delay: () => 500, methods: ['get'] },
|
||||
})
|
||||
)
|
||||
.then(() => {})
|
||||
}
|
||||
}
|
||||
|
||||
@ -31,15 +31,13 @@ interface ModelList {
|
||||
export default class JanEngineManagementExtension extends EngineManagementExtension {
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
|
||||
api?: KyInstance
|
||||
/**
|
||||
* Extended API instance for making requests to the Cortex API.
|
||||
* Get the API instance
|
||||
* @returns
|
||||
*/
|
||||
api: KyInstance
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
async apiInstance(): Promise<KyInstance> {
|
||||
if(this.api) return this.api
|
||||
const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
|
||||
this.api = ky.extend({
|
||||
prefixUrl: API_URL,
|
||||
@ -47,6 +45,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
return this.api
|
||||
}
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
// Symlink Engines Directory
|
||||
await executeOnMain(NODE, 'symlinkEngines')
|
||||
// Run Healthcheck
|
||||
@ -71,10 +75,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
*/
|
||||
async getEngines(): Promise<Engines> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get('v1/engines')
|
||||
.json<Engines>()
|
||||
.then((e) => e)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get('v1/engines')
|
||||
.json<Engines>()
|
||||
.then((e) => e)
|
||||
)
|
||||
) as Promise<Engines>
|
||||
}
|
||||
|
||||
@ -82,12 +88,15 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
* @returns A Promise that resolves to an object of list engines.
|
||||
*/
|
||||
async getRemoteModels(name: string): Promise<any> {
|
||||
return this.api
|
||||
.get(`v1/models/remote/${name}`)
|
||||
.json<ModelList>()
|
||||
.catch(() => ({
|
||||
data: [],
|
||||
})) as Promise<ModelList>
|
||||
return this.apiInstance().then(
|
||||
(api) =>
|
||||
api
|
||||
.get(`v1/models/remote/${name}`)
|
||||
.json<ModelList>()
|
||||
.catch(() => ({
|
||||
data: [],
|
||||
})) as Promise<ModelList>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -96,10 +105,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
*/
|
||||
async getInstalledEngines(name: InferenceEngine): Promise<EngineVariant[]> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get(`v1/engines/${name}`)
|
||||
.json<EngineVariant[]>()
|
||||
.then((e) => e)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get(`v1/engines/${name}`)
|
||||
.json<EngineVariant[]>()
|
||||
.then((e) => e)
|
||||
)
|
||||
) as Promise<EngineVariant[]>
|
||||
}
|
||||
|
||||
@ -115,12 +126,14 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
platform?: string
|
||||
) {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get(`v1/engines/${name}/releases/${version}`)
|
||||
.json<EngineReleased[]>()
|
||||
.then((e) =>
|
||||
platform ? e.filter((r) => r.name.includes(platform)) : e
|
||||
)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get(`v1/engines/${name}/releases/${version}`)
|
||||
.json<EngineReleased[]>()
|
||||
.then((e) =>
|
||||
platform ? e.filter((r) => r.name.includes(platform)) : e
|
||||
)
|
||||
)
|
||||
) as Promise<EngineReleased[]>
|
||||
}
|
||||
|
||||
@ -131,12 +144,14 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
*/
|
||||
async getLatestReleasedEngine(name: InferenceEngine, platform?: string) {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get(`v1/engines/${name}/releases/latest`)
|
||||
.json<EngineReleased[]>()
|
||||
.then((e) =>
|
||||
platform ? e.filter((r) => r.name.includes(platform)) : e
|
||||
)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get(`v1/engines/${name}/releases/latest`)
|
||||
.json<EngineReleased[]>()
|
||||
.then((e) =>
|
||||
platform ? e.filter((r) => r.name.includes(platform)) : e
|
||||
)
|
||||
)
|
||||
) as Promise<EngineReleased[]>
|
||||
}
|
||||
|
||||
@ -146,9 +161,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
*/
|
||||
async installEngine(name: string, engineConfig: EngineConfig) {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post(`v1/engines/${name}/install`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post(`v1/engines/${name}/install`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
)
|
||||
) as Promise<{ messages: string }>
|
||||
}
|
||||
|
||||
@ -179,9 +196,8 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
|
||||
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post('v1/engines', { json: engineConfig })
|
||||
.then((e) => {
|
||||
this.apiInstance().then((api) =>
|
||||
api.post('v1/engines', { json: engineConfig }).then((e) => {
|
||||
if (persistModels && engineConfig.metadata?.get_models_url) {
|
||||
// Pull /models from remote models endpoint
|
||||
return this.populateRemoteModels(engineConfig)
|
||||
@ -190,6 +206,7 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
}
|
||||
return e
|
||||
})
|
||||
)
|
||||
) as Promise<{ messages: string }>
|
||||
}
|
||||
|
||||
@ -199,9 +216,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
*/
|
||||
async uninstallEngine(name: InferenceEngine, engineConfig: EngineConfig) {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.delete(`v1/engines/${name}/install`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.delete(`v1/engines/${name}/install`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
)
|
||||
) as Promise<{ messages: string }>
|
||||
}
|
||||
|
||||
@ -210,25 +229,27 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
* @param model - Remote model object.
|
||||
*/
|
||||
async addRemoteModel(model: Model) {
|
||||
return this.queue
|
||||
.add(() =>
|
||||
this.api
|
||||
.post('v1/models/add', {
|
||||
json: {
|
||||
inference_params: {
|
||||
max_tokens: 4096,
|
||||
temperature: 0.7,
|
||||
top_p: 0.95,
|
||||
stream: true,
|
||||
frequency_penalty: 0,
|
||||
presence_penalty: 0,
|
||||
return this.queue.add(() =>
|
||||
this.apiInstance()
|
||||
.then((api) =>
|
||||
api
|
||||
.post('v1/models/add', {
|
||||
json: {
|
||||
inference_params: {
|
||||
max_tokens: 4096,
|
||||
temperature: 0.7,
|
||||
top_p: 0.95,
|
||||
stream: true,
|
||||
frequency_penalty: 0,
|
||||
presence_penalty: 0,
|
||||
},
|
||||
...model,
|
||||
},
|
||||
...model,
|
||||
},
|
||||
})
|
||||
.then((e) => e)
|
||||
)
|
||||
.then(() => {})
|
||||
})
|
||||
.then((e) => e)
|
||||
)
|
||||
.then(() => {})
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -237,10 +258,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
*/
|
||||
async getDefaultEngineVariant(name: InferenceEngine) {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get(`v1/engines/${name}/default`)
|
||||
.json<{ messages: string }>()
|
||||
.then((e) => e)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get(`v1/engines/${name}/default`)
|
||||
.json<{ messages: string }>()
|
||||
.then((e) => e)
|
||||
)
|
||||
) as Promise<DefaultEngineVariant>
|
||||
}
|
||||
|
||||
@ -254,9 +277,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
engineConfig: EngineConfig
|
||||
) {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post(`v1/engines/${name}/default`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post(`v1/engines/${name}/default`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
)
|
||||
) as Promise<{ messages: string }>
|
||||
}
|
||||
|
||||
@ -265,9 +290,11 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
*/
|
||||
async updateEngine(name: InferenceEngine, engineConfig?: EngineConfig) {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post(`v1/engines/${name}/update`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post(`v1/engines/${name}/update`, { json: engineConfig })
|
||||
.then((e) => e)
|
||||
)
|
||||
) as Promise<{ messages: string }>
|
||||
}
|
||||
|
||||
@ -276,10 +303,12 @@ export default class JanEngineManagementExtension extends EngineManagementExtens
|
||||
* @returns
|
||||
*/
|
||||
async healthz(): Promise<void> {
|
||||
return this.api
|
||||
.get('healthz', {
|
||||
retry: { limit: 20, delay: () => 500, methods: ['get'] },
|
||||
})
|
||||
return this.apiInstance()
|
||||
.then((api) =>
|
||||
api.get('healthz', {
|
||||
retry: { limit: 20, delay: () => 500, methods: ['get'] },
|
||||
})
|
||||
)
|
||||
.then(() => {
|
||||
this.queue.concurrency = Infinity
|
||||
})
|
||||
|
||||
@ -17,18 +17,21 @@ export default class JSONHardwareManagementExtension extends HardwareManagementE
|
||||
this.queue.add(() => this.healthz())
|
||||
}
|
||||
|
||||
api?: KyInstance
|
||||
/**
|
||||
* Get the API instance
|
||||
* @returns
|
||||
*/
|
||||
async apiInstance(): Promise<KyInstance> {
|
||||
if(this.api) return this.api
|
||||
const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
|
||||
return ky.extend({
|
||||
this.api = ky.extend({
|
||||
prefixUrl: API_URL,
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
return this.api
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -75,7 +75,22 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
|
||||
abortControllers = new Map<string, AbortController>()
|
||||
|
||||
api!: KyInstance
|
||||
api?: KyInstance
|
||||
/**
|
||||
* Get the API instance
|
||||
* @returns
|
||||
*/
|
||||
async apiInstance(): Promise<KyInstance> {
|
||||
if(this.api) return this.api
|
||||
const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
|
||||
this.api = ky.extend({
|
||||
prefixUrl: CORTEX_API_URL,
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
return this.api
|
||||
}
|
||||
|
||||
/**
|
||||
* Authorization headers for the API requests.
|
||||
@ -92,13 +107,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
*/
|
||||
async onLoad() {
|
||||
super.onLoad()
|
||||
const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
|
||||
this.api = ky.extend({
|
||||
prefixUrl: CORTEX_API_URL,
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
|
||||
// Register Settings
|
||||
this.registerSettings(SETTINGS)
|
||||
@ -172,45 +180,49 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
this.abortControllers.set(model.id, controller)
|
||||
|
||||
return await this.queue.add(() =>
|
||||
this.api
|
||||
.post('v1/models/start', {
|
||||
json: {
|
||||
...extractModelLoadParams(model.settings),
|
||||
model: model.id,
|
||||
engine:
|
||||
model.engine === InferenceEngine.nitro // Legacy model cache
|
||||
? InferenceEngine.cortex_llamacpp
|
||||
: model.engine,
|
||||
cont_batching: this.cont_batching,
|
||||
n_parallel: this.n_parallel,
|
||||
caching_enabled: this.caching_enabled,
|
||||
flash_attn: this.flash_attn,
|
||||
cache_type: this.cache_type,
|
||||
use_mmap: this.use_mmap,
|
||||
...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
|
||||
},
|
||||
timeout: false,
|
||||
signal,
|
||||
})
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.finally(() => this.abortControllers.delete(model.id))
|
||||
.then()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post('v1/models/start', {
|
||||
json: {
|
||||
...extractModelLoadParams(model.settings),
|
||||
model: model.id,
|
||||
engine:
|
||||
model.engine === InferenceEngine.nitro // Legacy model cache
|
||||
? InferenceEngine.cortex_llamacpp
|
||||
: model.engine,
|
||||
cont_batching: this.cont_batching,
|
||||
n_parallel: this.n_parallel,
|
||||
caching_enabled: this.caching_enabled,
|
||||
flash_attn: this.flash_attn,
|
||||
cache_type: this.cache_type,
|
||||
use_mmap: this.use_mmap,
|
||||
...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}),
|
||||
},
|
||||
timeout: false,
|
||||
signal,
|
||||
})
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.finally(() => this.abortControllers.delete(model.id))
|
||||
.then()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override async unloadModel(model: Model): Promise<void> {
|
||||
return this.api
|
||||
.post('v1/models/stop', {
|
||||
json: { model: model.id },
|
||||
})
|
||||
.json()
|
||||
.finally(() => {
|
||||
this.abortControllers.get(model.id)?.abort()
|
||||
})
|
||||
.then()
|
||||
return this.apiInstance().then((api) =>
|
||||
api
|
||||
.post('v1/models/stop', {
|
||||
json: { model: model.id },
|
||||
})
|
||||
.json()
|
||||
.finally(() => {
|
||||
this.abortControllers.get(model.id)?.abort()
|
||||
})
|
||||
.then()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -218,15 +230,17 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
* @returns
|
||||
*/
|
||||
private async healthz(): Promise<void> {
|
||||
return this.api
|
||||
.get('healthz', {
|
||||
retry: {
|
||||
limit: 20,
|
||||
delay: () => 500,
|
||||
methods: ['get'],
|
||||
},
|
||||
})
|
||||
.then(() => {})
|
||||
return this.apiInstance().then((api) =>
|
||||
api
|
||||
.get('healthz', {
|
||||
retry: {
|
||||
limit: 20,
|
||||
delay: () => 500,
|
||||
methods: ['get'],
|
||||
},
|
||||
})
|
||||
.then(() => {})
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -234,13 +248,15 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
* @returns
|
||||
*/
|
||||
private async clean(): Promise<any> {
|
||||
return this.api
|
||||
.delete('processmanager/destroy', {
|
||||
timeout: 2000, // maximum 2 seconds
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
})
|
||||
return this.apiInstance()
|
||||
.then((api) =>
|
||||
api.delete('processmanager/destroy', {
|
||||
timeout: 2000, // maximum 2 seconds
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
})
|
||||
)
|
||||
.catch(() => {
|
||||
// Do nothing
|
||||
})
|
||||
|
||||
@ -33,15 +33,13 @@ type Data<T> = {
|
||||
export default class JanModelExtension extends ModelExtension {
|
||||
queue = new PQueue({ concurrency: 1 })
|
||||
|
||||
api?: KyInstance
|
||||
/**
|
||||
* Extended API instance for making requests to the Cortex API.
|
||||
* Get the API instance
|
||||
* @returns
|
||||
*/
|
||||
api: KyInstance
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
async apiInstance(): Promise<KyInstance> {
|
||||
if(this.api) return this.api
|
||||
const apiKey = (await window.core?.api.appToken()) ?? 'cortex.cpp'
|
||||
this.api = ky.extend({
|
||||
prefixUrl: CORTEX_API_URL,
|
||||
@ -49,6 +47,12 @@ export default class JanModelExtension extends ModelExtension {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
return this.api
|
||||
}
|
||||
/**
|
||||
* Called when the extension is loaded.
|
||||
*/
|
||||
async onLoad() {
|
||||
this.queue.add(() => this.healthz())
|
||||
|
||||
this.registerSettings(SETTINGS)
|
||||
@ -94,13 +98,15 @@ export default class JanModelExtension extends ModelExtension {
|
||||
* Sending POST to /models/pull/{id} endpoint to pull the model
|
||||
*/
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post('v1/models/pull', { json: { model, id, name }, timeout: false })
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.then()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post('v1/models/pull', { json: { model, id, name }, timeout: false })
|
||||
.json()
|
||||
.catch(async (e) => {
|
||||
throw (await e.response?.json()) ?? e
|
||||
})
|
||||
.then()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@ -115,10 +121,12 @@ export default class JanModelExtension extends ModelExtension {
|
||||
* Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
|
||||
*/
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.delete('v1/models/pull', { json: { taskId: model } })
|
||||
.json()
|
||||
.then()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.delete('v1/models/pull', { json: { taskId: model } })
|
||||
.json()
|
||||
.then()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@ -129,7 +137,11 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async deleteModel(model: string): Promise<void> {
|
||||
return this.queue
|
||||
.add(() => this.api.delete(`v1/models/${model}`).json().then())
|
||||
.add(() =>
|
||||
this.apiInstance().then((api) =>
|
||||
api.delete(`v1/models/${model}`).json().then()
|
||||
)
|
||||
)
|
||||
.catch((e) => console.debug(e))
|
||||
.finally(async () => {
|
||||
// Delete legacy model files
|
||||
@ -231,13 +243,15 @@ export default class JanModelExtension extends ModelExtension {
|
||||
async updateModel(model: Partial<Model>): Promise<Model> {
|
||||
return this.queue
|
||||
.add(() =>
|
||||
this.api
|
||||
.patch(`v1/models/${model.id}`, {
|
||||
json: { ...model },
|
||||
timeout: false,
|
||||
})
|
||||
.json()
|
||||
.then()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.patch(`v1/models/${model.id}`, {
|
||||
json: { ...model },
|
||||
timeout: false,
|
||||
})
|
||||
.json()
|
||||
.then()
|
||||
)
|
||||
)
|
||||
.then(() => this.getModel(model.id))
|
||||
}
|
||||
@ -248,10 +262,12 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async getModel(model: string): Promise<Model> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get(`v1/models/${model}`)
|
||||
.json()
|
||||
.then((e) => this.transformModel(e))
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.get(`v1/models/${model}`)
|
||||
.json()
|
||||
.then((e) => this.transformModel(e))
|
||||
)
|
||||
) as Promise<Model>
|
||||
}
|
||||
|
||||
@ -267,14 +283,16 @@ export default class JanModelExtension extends ModelExtension {
|
||||
option?: OptionType
|
||||
): Promise<void> {
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.post('v1/models/import', {
|
||||
json: { model, modelPath, name, option },
|
||||
timeout: false,
|
||||
})
|
||||
.json()
|
||||
.catch((e) => console.debug(e)) // Ignore error
|
||||
.then()
|
||||
this.apiInstance().then((api) =>
|
||||
api
|
||||
.post('v1/models/import', {
|
||||
json: { model, modelPath, name, option },
|
||||
timeout: false,
|
||||
})
|
||||
.json()
|
||||
.catch((e) => console.debug(e)) // Ignore error
|
||||
.then()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@ -285,7 +303,11 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async getSources(): Promise<ModelSource[]> {
|
||||
const sources = await this.queue
|
||||
.add(() => this.api.get('v1/models/sources').json<Data<ModelSource>>())
|
||||
.add(() =>
|
||||
this.apiInstance().then((api) =>
|
||||
api.get('v1/models/sources').json<Data<ModelSource>>()
|
||||
)
|
||||
)
|
||||
.then((e) => (typeof e === 'object' ? (e.data as ModelSource[]) : []))
|
||||
.catch(() => [])
|
||||
return sources.concat(
|
||||
@ -299,11 +321,13 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async addSource(source: string): Promise<any> {
|
||||
return this.queue.add(() =>
|
||||
this.api.post('v1/models/sources', {
|
||||
json: {
|
||||
source,
|
||||
},
|
||||
})
|
||||
this.apiInstance().then((api) =>
|
||||
api.post('v1/models/sources', {
|
||||
json: {
|
||||
source,
|
||||
},
|
||||
})
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@ -313,12 +337,14 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async deleteSource(source: string): Promise<any> {
|
||||
return this.queue.add(() =>
|
||||
this.api.delete('v1/models/sources', {
|
||||
json: {
|
||||
source,
|
||||
},
|
||||
timeout: false,
|
||||
})
|
||||
this.apiInstance().then((api) =>
|
||||
api.delete('v1/models/sources', {
|
||||
json: {
|
||||
source,
|
||||
},
|
||||
timeout: false,
|
||||
})
|
||||
)
|
||||
)
|
||||
}
|
||||
// END - Model Sources
|
||||
@ -329,7 +355,9 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async isModelLoaded(model: string): Promise<boolean> {
|
||||
return this.queue
|
||||
.add(() => this.api.get(`v1/models/status/${model}`))
|
||||
.add(() =>
|
||||
this.apiInstance().then((api) => api.get(`v1/models/status/${model}`))
|
||||
)
|
||||
.then((e) => true)
|
||||
.catch(() => false)
|
||||
}
|
||||
@ -348,7 +376,11 @@ export default class JanModelExtension extends ModelExtension {
|
||||
*/
|
||||
async fetchModels(): Promise<Model[]> {
|
||||
return this.queue
|
||||
.add(() => this.api.get('v1/models?limit=-1').json<Data<Model>>())
|
||||
.add(() =>
|
||||
this.apiInstance().then((api) =>
|
||||
api.get('v1/models?limit=-1').json<Data<Model>>()
|
||||
)
|
||||
)
|
||||
.then((e) =>
|
||||
typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
|
||||
)
|
||||
@ -387,7 +419,11 @@ export default class JanModelExtension extends ModelExtension {
|
||||
[key: string]: any
|
||||
}): Promise<void> {
|
||||
return this.queue
|
||||
.add(() => this.api.patch('v1/configs', { json: body }).then(() => {}))
|
||||
.add(() =>
|
||||
this.apiInstance().then((api) =>
|
||||
api.patch('v1/configs', { json: body }).then(() => {})
|
||||
)
|
||||
)
|
||||
.catch((e) => console.debug(e))
|
||||
}
|
||||
|
||||
@ -396,14 +432,16 @@ export default class JanModelExtension extends ModelExtension {
|
||||
* @returns
|
||||
*/
|
||||
private healthz(): Promise<void> {
|
||||
return this.api
|
||||
.get('healthz', {
|
||||
retry: {
|
||||
limit: 20,
|
||||
delay: () => 500,
|
||||
methods: ['get'],
|
||||
},
|
||||
})
|
||||
return this.apiInstance()
|
||||
.then((api) =>
|
||||
api.get('healthz', {
|
||||
retry: {
|
||||
limit: 20,
|
||||
delay: () => 500,
|
||||
methods: ['get'],
|
||||
},
|
||||
})
|
||||
)
|
||||
.then(() => {
|
||||
this.queue.concurrency = Infinity
|
||||
})
|
||||
@ -416,17 +454,22 @@ export default class JanModelExtension extends ModelExtension {
|
||||
const models = await this.fetchModels()
|
||||
|
||||
return this.queue.add(() =>
|
||||
this.api
|
||||
.get('v1/models/hub?author=cortexso&tag=cortex.cpp')
|
||||
.json<Data<string>>()
|
||||
.then((e) => {
|
||||
e.data?.forEach((model) => {
|
||||
if (
|
||||
!models.some((e) => 'modelSource' in e && e.modelSource === model)
|
||||
)
|
||||
this.addSource(model).catch((e) => console.debug(e))
|
||||
})
|
||||
})
|
||||
this.apiInstance()
|
||||
.then((api) =>
|
||||
api
|
||||
.get('v1/models/hub?author=cortexso&tag=cortex.cpp')
|
||||
.json<Data<string>>()
|
||||
.then((e) => {
|
||||
e.data?.forEach((model) => {
|
||||
if (
|
||||
!models.some(
|
||||
(e) => 'modelSource' in e && e.modelSource === model
|
||||
)
|
||||
)
|
||||
this.addSource(model).catch((e) => console.debug(e))
|
||||
})
|
||||
})
|
||||
)
|
||||
.catch((e) => console.debug(e))
|
||||
)
|
||||
}
|
||||
|
||||
@ -6,13 +6,14 @@ import { useState } from 'react'
|
||||
import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd'
|
||||
|
||||
import { Progress, ScrollArea, Switch } from '@janhq/joi'
|
||||
import { useAtom, useAtomValue } from 'jotai'
|
||||
import { useAtom, useAtomValue, useSetAtom } from 'jotai'
|
||||
import { atomWithStorage } from 'jotai/utils'
|
||||
|
||||
import { ChevronDownIcon, GripVerticalIcon } from 'lucide-react'
|
||||
|
||||
import { twMerge } from 'tailwind-merge'
|
||||
|
||||
import { activeModelAtom } from '@/hooks/useActiveModel'
|
||||
import {
|
||||
useGetHardwareInfo,
|
||||
setActiveGpus,
|
||||
@ -47,6 +48,7 @@ const Hardware = () => {
|
||||
const ramUtilitized = useAtomValue(ramUtilitizedAtom)
|
||||
const showScrollBar = useAtomValue(showScrollBarAtom)
|
||||
const [gpus, setGpus] = useAtom(gpusAtom)
|
||||
const setActiveModel = useSetAtom(activeModelAtom)
|
||||
|
||||
const [orderGpus, setOrderGpus] = useAtom(orderGpusAtom)
|
||||
|
||||
@ -70,11 +72,15 @@ const Hardware = () => {
|
||||
.filter((gpu: any) => gpu.activated)
|
||||
.map((gpu: any) => Number(gpu.id))
|
||||
await setActiveGpus({ gpus: activeGpuIds })
|
||||
setActiveModel(undefined)
|
||||
mutate()
|
||||
window.location.reload()
|
||||
} catch (error) {
|
||||
console.error('Failed to update active GPUs:', error)
|
||||
}
|
||||
setIsActivatingGpu((prev) => {
|
||||
prev.delete(id)
|
||||
return new Set(prev)
|
||||
})
|
||||
}
|
||||
|
||||
const handleDragEnd = (result: any) => {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user