* feat: tensorrt-llm-extension * fix: loading * feat: add download tensorrt llm runner Signed-off-by: James <james@jan.ai> * feat: update to rollupjs instead of webpack for monitoring extension Signed-off-by: James <james@jan.ai> * feat: move update nvidia info to monitor extension Signed-off-by: James <james@jan.ai> * allow download tensorrt Signed-off-by: James <james@jan.ai> * update Signed-off-by: James <james@jan.ai> * allow download tensor rt based on gpu setting Signed-off-by: James <james@jan.ai> * update downloaded models Signed-off-by: James <james@jan.ai> * feat: add extension compatibility * dynamic tensor rt engines Signed-off-by: James <james@jan.ai> * update models Signed-off-by: James <james@jan.ai> * chore: remove ts-ignore * feat: getting installation state from extension Signed-off-by: James <james@jan.ai> * chore: adding type for decompress Signed-off-by: James <james@jan.ai> * feat: update according Louis's comment Signed-off-by: James <james@jan.ai> * feat: add progress for installing extension Signed-off-by: James <james@jan.ai> * chore: remove args from extension installation * fix: model download does not work properly * fix: do not allow user to stop tensorrtllm inference * fix: extension installed style * fix: download tensorrt does not update state Signed-off-by: James <james@jan.ai> * chore: replace int4 by fl16 * feat: modal for installing extension Signed-off-by: James <james@jan.ai> * fix: start download immediately after press install Signed-off-by: James <james@jan.ai> * fix: error switching between engines * feat: rename inference provider to ai engine and refactor to core * fix: missing ulid * fix: core bundler * feat: add cancel extension installing Signed-off-by: James <james@jan.ai> * remove mocking for mac Signed-off-by: James <james@jan.ai> * fix: show models only when extension is ready * add tensorrt badge for model Signed-off-by: James <james@jan.ai> * fix: copy * fix: add compatible check (#2342) * fix: add compatible check Signed-off-by: James <james@jan.ai> * fix: copy * fix: font * fix: copy * fix: broken monitoring extension * chore: bump engine * fix: copy * fix: model copy * fix: copy * fix: model json --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: Louis <louis@jan.ai> * fix: vulkan support * fix: installation button padding * fix: empty script * fix: remove hard code string --------- Signed-off-by: James <james@jan.ai> Co-authored-by: James <james@jan.ai> Co-authored-by: NamH <NamNh0122@gmail.com>
68 lines
2.2 KiB
TypeScript
68 lines
2.2 KiB
TypeScript
import { Observable } from 'rxjs'
|
|
import { ModelRuntimeParams } from '../../../types'
|
|
/**
|
|
* Sends a request to the inference server to generate a response based on the recent messages.
|
|
* @param recentMessages - An array of recent messages to use as context for the inference.
|
|
* @returns An Observable that emits the generated response as a string.
|
|
*/
|
|
export function requestInference(
|
|
inferenceUrl: string,
|
|
recentMessages: any[],
|
|
model: {
|
|
id: string
|
|
parameters: ModelRuntimeParams
|
|
},
|
|
controller?: AbortController
|
|
): Observable<string> {
|
|
return new Observable((subscriber) => {
|
|
const requestBody = JSON.stringify({
|
|
messages: recentMessages,
|
|
model: model.id,
|
|
stream: true,
|
|
...model.parameters,
|
|
})
|
|
fetch(inferenceUrl, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'Access-Control-Allow-Origin': '*',
|
|
'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
|
|
},
|
|
body: requestBody,
|
|
signal: controller?.signal,
|
|
})
|
|
.then(async (response) => {
|
|
if (model.parameters.stream === false) {
|
|
const data = await response.json()
|
|
subscriber.next(data.choices[0]?.message?.content ?? '')
|
|
} else {
|
|
const stream = response.body
|
|
const decoder = new TextDecoder('utf-8')
|
|
const reader = stream?.getReader()
|
|
let content = ''
|
|
|
|
while (true && reader) {
|
|
const { done, value } = await reader.read()
|
|
if (done) {
|
|
break
|
|
}
|
|
const text = decoder.decode(value)
|
|
const lines = text.trim().split('\n')
|
|
for (const line of lines) {
|
|
if (line.startsWith('data: ') && !line.includes('data: [DONE]')) {
|
|
const data = JSON.parse(line.replace('data: ', ''))
|
|
content += data.choices[0]?.delta?.content ?? ''
|
|
if (content.startsWith('assistant: ')) {
|
|
content = content.replace('assistant: ', '')
|
|
}
|
|
subscriber.next(content)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
subscriber.complete()
|
|
})
|
|
.catch((err) => subscriber.error(err))
|
|
})
|
|
}
|