Louis d85d02693b
feat: Nitro-Tensorrt-LLM Extension (#2280)
* feat: tensorrt-llm-extension

* fix: loading

* feat: add download tensorrt llm runner

Signed-off-by: James <james@jan.ai>

* feat: update to rollupjs instead of webpack for monitoring extension

Signed-off-by: James <james@jan.ai>

* feat: move update nvidia info to monitor extension

Signed-off-by: James <james@jan.ai>

* allow download tensorrt

Signed-off-by: James <james@jan.ai>

* update

Signed-off-by: James <james@jan.ai>

* allow download tensor rt based on gpu setting

Signed-off-by: James <james@jan.ai>

* update downloaded models

Signed-off-by: James <james@jan.ai>

* feat: add extension compatibility

* dynamic tensor rt engines

Signed-off-by: James <james@jan.ai>

* update models

Signed-off-by: James <james@jan.ai>

* chore: remove ts-ignore

* feat: getting installation state from extension

Signed-off-by: James <james@jan.ai>

* chore: adding type for decompress

Signed-off-by: James <james@jan.ai>

* feat: update according Louis's comment

Signed-off-by: James <james@jan.ai>

* feat: add progress for installing extension

Signed-off-by: James <james@jan.ai>

* chore: remove args from extension installation

* fix: model download does not work properly

* fix: do not allow user to stop tensorrtllm inference

* fix: extension installed style

* fix: download tensorrt does not update state

Signed-off-by: James <james@jan.ai>

* chore: replace int4 by fl16

* feat: modal for installing extension

Signed-off-by: James <james@jan.ai>

* fix: start download immediately after press install

Signed-off-by: James <james@jan.ai>

* fix: error switching between engines

* feat: rename inference provider to ai engine and refactor to core

* fix: missing ulid

* fix: core bundler

* feat: add cancel extension installing

Signed-off-by: James <james@jan.ai>

* remove mocking for mac

Signed-off-by: James <james@jan.ai>

* fix: show models only when extension is ready

* add tensorrt badge for model

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: add compatible check (#2342)

* fix: add compatible check

Signed-off-by: James <james@jan.ai>

* fix: copy

* fix: font

* fix: copy

* fix: broken monitoring extension

* chore: bump engine

* fix: copy

* fix: model copy

* fix: copy

* fix: model json

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: Louis <louis@jan.ai>

* fix: vulkan support

* fix: installation button padding

* fix: empty script

* fix: remove hard code string

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: NamH <NamNh0122@gmail.com>
2024-03-14 14:07:22 +07:00

194 lines
5.0 KiB
TypeScript

import { useCallback } from 'react'
import { Model } from '@janhq/core'
import {
Badge,
Button,
Tooltip,
TooltipArrow,
TooltipContent,
TooltipPortal,
TooltipTrigger,
} from '@janhq/uikit'
import { useAtomValue, useSetAtom } from 'jotai'
import { ChevronDownIcon } from 'lucide-react'
import { twMerge } from 'tailwind-merge'
import ModalCancelDownload from '@/containers/ModalCancelDownload'
import { MainViewState } from '@/constants/screens'
import { useCreateNewThread } from '@/hooks/useCreateNewThread'
import useDownloadModel from '@/hooks/useDownloadModel'
import { toGibibytes } from '@/utils/converter'
import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
import {
downloadedModelsAtom,
getDownloadingModelAtom,
} from '@/helpers/atoms/Model.atom'
import {
nvidiaTotalVramAtom,
totalRamAtom,
} from '@/helpers/atoms/SystemBar.atom'
type Props = {
model: Model
onClick: () => void
open: string
}
const getLabel = (size: number, ram: number) => {
if (size * 1.25 >= ram) {
return (
<Badge className="rounded-md" themes="danger">
Not enough RAM
</Badge>
)
} else {
return (
<Badge className="rounded-md" themes="success">
Recommended
</Badge>
)
}
}
const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
const { downloadModel } = useDownloadModel()
const downloadingModels = useAtomValue(getDownloadingModelAtom)
const downloadedModels = useAtomValue(downloadedModelsAtom)
const { requestCreateNewThread } = useCreateNewThread()
const totalRam = useAtomValue(totalRamAtom)
const nvidiaTotalVram = useAtomValue(nvidiaTotalVramAtom)
const setMainViewState = useSetAtom(mainViewStateAtom)
// Default nvidia returns vram in MB, need to convert to bytes to match the unit of totalRamW
let ram = nvidiaTotalVram * 1024 * 1024
if (ram === 0) {
ram = totalRam
}
const serverEnabled = useAtomValue(serverEnabledAtom)
const assistants = useAtomValue(assistantsAtom)
const onDownloadClick = useCallback(() => {
downloadModel(model)
}, [model, downloadModel])
const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
let downloadButton = (
<Button
className="z-50"
onClick={(e) => {
e.stopPropagation()
onDownloadClick()
}}
>
Download
</Button>
)
const isDownloading = downloadingModels.some((md) => md.id === model.id)
const onUseModelClick = useCallback(async () => {
if (assistants.length === 0) {
alert('No assistant available')
return
}
await requestCreateNewThread(assistants[0], model)
setMainViewState(MainViewState.Thread)
}, [assistants, model, requestCreateNewThread, setMainViewState])
if (isDownloaded) {
downloadButton = (
<Tooltip>
<TooltipTrigger>
<Button
themes="secondaryBlue"
className="min-w-[98px]"
onClick={onUseModelClick}
disabled={serverEnabled}
>
Use
</Button>
</TooltipTrigger>
{serverEnabled && (
<TooltipPortal>
<TooltipContent side="top">
<span>Threads are disabled while the server is running</span>
<TooltipArrow />
</TooltipContent>
</TooltipPortal>
)}
</Tooltip>
)
} else if (isDownloading) {
downloadButton = <ModalCancelDownload model={model} />
}
return (
<div
className="cursor-pointer rounded-t-md bg-background"
onClick={onClick}
>
{model.metadata.cover && (
<div className="relative h-full w-full ">
<img
src={model.metadata.cover}
className="h-[250px] w-full object-cover"
alt={`Cover - ${model.id}`}
/>
</div>
)}
<div className="flex items-center justify-between p-4">
<div className="flex items-center gap-2">
<span className="font-bold">{model.name}</span>
<EngineBadge engine={model.engine} />
</div>
<div className="inline-flex items-center space-x-2">
<span className="mr-4 font-semibold text-muted-foreground">
{toGibibytes(model.metadata.size)}
</span>
{getLabel(model.metadata.size, ram)}
{downloadButton}
<ChevronDownIcon
className={twMerge(
'h-5 w-5 flex-none text-gray-400',
open === model.id && 'rotate-180'
)}
/>
</div>
</div>
</div>
)
}
type EngineBadgeProps = {
engine: string
}
const EngineBadge: React.FC<EngineBadgeProps> = ({ engine }) => {
switch (engine) {
case 'nitro-tensorrt-llm':
return (
<div className="flex items-center justify-center rounded-md bg-[#EFF6FF] px-2 py-[2px] font-semibold text-primary">
TensorRT-LLM
</div>
)
default:
return null
}
}
export default ExploreModelItemHeader