feat: Jan can see (#2069)
* feat: jan can see feat: Add GPT-4 Vision model (Preview) fix: Add visionModel as property in ModelInfo fix: Fix condition to load local messages in useSetActiveThread hook feat: Enable Image as input for chat fix: Update model parameters in JSON files for remote GPT models fix: Add thread as optional fix: Add support for message as image fix: Linter fix: Update proxyModel to proxy_model and add textModel chore: Change proxyModel to proxy_model fix: Update settings with visionModel and textModel fix: vision model passed through the retrieval tool fix: linter * fix: could not load image and request is not able to be sent --------- Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
parent
1584f0d21e
commit
e6c10202e0
@ -7,7 +7,7 @@ export type ModelInfo = {
|
||||
settings: ModelSettingParams
|
||||
parameters: ModelRuntimeParams
|
||||
engine?: InferenceEngine
|
||||
proxyEngine?: InferenceEngine
|
||||
proxy_model?: InferenceEngine
|
||||
}
|
||||
|
||||
/**
|
||||
@ -93,12 +93,7 @@ export type Model = {
|
||||
*/
|
||||
engine: InferenceEngine
|
||||
|
||||
proxyEngine?: InferenceEngine
|
||||
|
||||
/**
|
||||
* Is multimodal or not.
|
||||
*/
|
||||
visionModel?: boolean
|
||||
proxy_model?: InferenceEngine
|
||||
}
|
||||
|
||||
export type ModelMetadata = {
|
||||
@ -124,6 +119,8 @@ export type ModelSettingParams = {
|
||||
llama_model_path?: string
|
||||
mmproj?: string
|
||||
cont_batching?: boolean
|
||||
vision_model?: boolean
|
||||
text_model?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -10,6 +10,7 @@ import {
|
||||
executeOnMain,
|
||||
AssistantExtension,
|
||||
AssistantEvent,
|
||||
ChatCompletionMessageContentType,
|
||||
} from '@janhq/core'
|
||||
|
||||
export default class JanAssistantExtension extends AssistantExtension {
|
||||
@ -86,7 +87,7 @@ export default class JanAssistantExtension extends AssistantExtension {
|
||||
NODE,
|
||||
'toolRetrievalIngestNewDocument',
|
||||
docFile,
|
||||
data.model?.proxyEngine
|
||||
data.model?.proxy_model
|
||||
)
|
||||
}
|
||||
} else if (
|
||||
@ -105,7 +106,7 @@ export default class JanAssistantExtension extends AssistantExtension {
|
||||
...data,
|
||||
model: {
|
||||
...data.model,
|
||||
engine: data.model.proxyEngine,
|
||||
engine: data.model.proxy_model,
|
||||
},
|
||||
}
|
||||
events.emit(MessageEvent.OnMessageSent, output)
|
||||
@ -168,7 +169,7 @@ export default class JanAssistantExtension extends AssistantExtension {
|
||||
...data,
|
||||
model: {
|
||||
...data.model,
|
||||
engine: data.model.proxyEngine,
|
||||
engine: data.model.proxy_model,
|
||||
},
|
||||
}
|
||||
events.emit(MessageEvent.OnMessageSent, output)
|
||||
|
||||
@ -16,6 +16,8 @@
|
||||
"description": "BakLlava 1 can bring vision understanding to Jan",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"vision_model": true,
|
||||
"text_model": false,
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
|
||||
"llama_model_path": "ggml-model-q5_k.gguf",
|
||||
|
||||
@ -11,7 +11,10 @@
|
||||
"description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
|
||||
@ -11,7 +11,10 @@
|
||||
"description": "OpenAI GPT 3.5 Turbo model is extremely good",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
|
||||
26
models/gpt-4-vision-preview/model.json
Normal file
26
models/gpt-4-vision-preview/model.json
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://openai.com"
|
||||
}
|
||||
],
|
||||
"id": "gpt-4-vision-preview",
|
||||
"object": "model",
|
||||
"name": "OpenAI GPT 4 with Vision (Preview)",
|
||||
"version": "1.0",
|
||||
"description": "OpenAI GPT 4 with Vision model is extremely good in preview",
|
||||
"format": "api",
|
||||
"settings": {
|
||||
"vision_model": true,
|
||||
"textModel": false
|
||||
},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length", "Vision"]
|
||||
},
|
||||
"engine": "openai"
|
||||
}
|
||||
@ -11,7 +11,10 @@
|
||||
"description": "OpenAI GPT 4 model is extremely good",
|
||||
"format": "api",
|
||||
"settings": {},
|
||||
"parameters": {},
|
||||
"parameters": {
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"metadata": {
|
||||
"author": "OpenAI",
|
||||
"tags": ["General", "Big Context Length"]
|
||||
|
||||
@ -12,10 +12,12 @@
|
||||
"id": "llava-1.5-13b-q5",
|
||||
"object": "model",
|
||||
"name": "LlaVa 1.5 13B Q5 K",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "LlaVa 1.5 can bring vision understanding to Jan",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"vision_model": true,
|
||||
"text_model": false,
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
|
||||
"llama_model_path": "ggml-model-q5_k.gguf",
|
||||
|
||||
@ -12,10 +12,12 @@
|
||||
"id": "llava-1.5-7b-q5",
|
||||
"object": "model",
|
||||
"name": "LlaVa 1.5 7B Q5 K",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "LlaVa 1.5 can bring vision understanding to Jan",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"vision_model": true,
|
||||
"text_model": false,
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
|
||||
"llama_model_path": "ggml-model-q5_k.gguf",
|
||||
|
||||
@ -139,11 +139,16 @@ const DropdownListSidebar = ({
|
||||
|
||||
// Update model parameter to the thread file
|
||||
if (model)
|
||||
updateModelParameter(activeThread.id, {
|
||||
params: modelParams,
|
||||
modelId: model.id,
|
||||
engine: model.engine,
|
||||
})
|
||||
updateModelParameter(
|
||||
activeThread.id,
|
||||
{
|
||||
params: modelParams,
|
||||
modelId: model.id,
|
||||
engine: model.engine,
|
||||
},
|
||||
// Overwrite the existing model parameter
|
||||
true
|
||||
)
|
||||
}
|
||||
},
|
||||
[
|
||||
|
||||
@ -1,4 +1,9 @@
|
||||
import { openFileExplorer, joinPath, getJanDataFolderPath } from '@janhq/core'
|
||||
import {
|
||||
openFileExplorer,
|
||||
joinPath,
|
||||
getJanDataFolderPath,
|
||||
baseName,
|
||||
} from '@janhq/core'
|
||||
import { useAtomValue } from 'jotai'
|
||||
|
||||
import { selectedModelAtom } from '@/containers/DropdownListSidebar'
|
||||
@ -78,6 +83,8 @@ export const usePath = () => {
|
||||
|
||||
const userSpace = await getJanDataFolderPath()
|
||||
let filePath = undefined
|
||||
|
||||
id = await baseName(id)
|
||||
filePath = await joinPath(['threads', `${activeThread.id}/files`, `${id}`])
|
||||
if (!filePath) return
|
||||
const fullPath = await joinPath([userSpace, filePath])
|
||||
|
||||
@ -30,7 +30,7 @@ import {
|
||||
fileUploadAtom,
|
||||
} from '@/containers/Providers/Jotai'
|
||||
|
||||
import { getBase64 } from '@/utils/base64'
|
||||
import { compressImage, getBase64 } from '@/utils/base64'
|
||||
import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'
|
||||
|
||||
import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
|
||||
@ -169,12 +169,22 @@ export default function useSendChatMessage() {
|
||||
setCurrentPrompt('')
|
||||
setEditPrompt('')
|
||||
|
||||
const base64Blob = fileUpload[0]
|
||||
? await getBase64(fileUpload[0].file).then()
|
||||
let base64Blob = fileUpload[0]
|
||||
? await getBase64(fileUpload[0].file)
|
||||
: undefined
|
||||
|
||||
const fileContentType = fileUpload[0]?.type
|
||||
|
||||
const msgId = ulid()
|
||||
|
||||
const isDocumentInput = base64Blob && fileContentType === 'pdf'
|
||||
const isImageInput = base64Blob && fileContentType === 'image'
|
||||
|
||||
if (isImageInput && base64Blob) {
|
||||
// Compress image
|
||||
base64Blob = await compressImage(base64Blob, 512)
|
||||
}
|
||||
|
||||
const messages: ChatCompletionMessage[] = [
|
||||
activeThread.assistants[0]?.instructions,
|
||||
]
|
||||
@ -202,13 +212,23 @@ export default function useSendChatMessage() {
|
||||
type: ChatCompletionMessageContentType.Text,
|
||||
text: prompt,
|
||||
},
|
||||
{
|
||||
type: ChatCompletionMessageContentType.Doc,
|
||||
doc_url: {
|
||||
url: `threads/${activeThread.id}/files/${msgId}.pdf`,
|
||||
},
|
||||
},
|
||||
]
|
||||
isDocumentInput
|
||||
? {
|
||||
type: ChatCompletionMessageContentType.Doc,
|
||||
doc_url: {
|
||||
url: `threads/${activeThread.id}/files/${msgId}.pdf`,
|
||||
},
|
||||
}
|
||||
: null,
|
||||
isImageInput
|
||||
? {
|
||||
type: ChatCompletionMessageContentType.Image,
|
||||
image_url: {
|
||||
url: base64Blob,
|
||||
},
|
||||
}
|
||||
: null,
|
||||
].filter((e) => e !== null)
|
||||
: prompt,
|
||||
} as ChatCompletionMessage,
|
||||
])
|
||||
@ -226,8 +246,13 @@ export default function useSendChatMessage() {
|
||||
) {
|
||||
modelRequest = {
|
||||
...modelRequest,
|
||||
engine: InferenceEngine.tool_retrieval_enabled,
|
||||
proxyEngine: modelRequest.engine,
|
||||
// Tool retrieval support document input only for now
|
||||
...(isDocumentInput
|
||||
? {
|
||||
engine: InferenceEngine.tool_retrieval_enabled,
|
||||
proxy_model: modelRequest.engine,
|
||||
}
|
||||
: {}),
|
||||
}
|
||||
}
|
||||
const messageRequest: MessageRequest = {
|
||||
|
||||
@ -21,17 +21,17 @@ export default function useSetActiveThread() {
|
||||
|
||||
const setActiveThread = async (thread: Thread) => {
|
||||
// Load local messages only if there are no messages in the state
|
||||
if (!readyMessageThreads[thread.id]) {
|
||||
const messages = await getLocalThreadMessage(thread.id)
|
||||
setThreadMessage(thread.id, messages)
|
||||
if (!readyMessageThreads[thread?.id]) {
|
||||
const messages = await getLocalThreadMessage(thread?.id)
|
||||
setThreadMessage(thread?.id, messages)
|
||||
}
|
||||
|
||||
setActiveThreadId(thread.id)
|
||||
setActiveThreadId(thread?.id)
|
||||
const modelParams: ModelParams = {
|
||||
...thread.assistants[0]?.model?.parameters,
|
||||
...thread.assistants[0]?.model?.settings,
|
||||
...thread?.assistants[0]?.model?.parameters,
|
||||
...thread?.assistants[0]?.model?.settings,
|
||||
}
|
||||
setThreadModelParams(thread.id, modelParams)
|
||||
setThreadModelParams(thread?.id, modelParams)
|
||||
}
|
||||
|
||||
return { setActiveThread }
|
||||
|
||||
@ -37,7 +37,8 @@ export default function useUpdateModelParameters() {
|
||||
|
||||
const updateModelParameter = async (
|
||||
threadId: string,
|
||||
settings: UpdateModelParameter
|
||||
settings: UpdateModelParameter,
|
||||
overwrite: boolean = false
|
||||
) => {
|
||||
const thread = threads.find((thread) => thread.id === threadId)
|
||||
if (!thread) {
|
||||
@ -66,8 +67,14 @@ export default function useUpdateModelParameters() {
|
||||
const runtimeParams = toRuntimeParams(updatedModelParams)
|
||||
const settingParams = toSettingParams(updatedModelParams)
|
||||
|
||||
assistant.model.parameters = runtimeParams
|
||||
assistant.model.settings = settingParams
|
||||
assistant.model.parameters = {
|
||||
...(overwrite ? {} : assistant.model.parameters),
|
||||
...runtimeParams,
|
||||
}
|
||||
assistant.model.settings = {
|
||||
...(overwrite ? {} : assistant.model.settings),
|
||||
...settingParams,
|
||||
}
|
||||
if (selectedModel) {
|
||||
assistant.model.id = settings.modelId ?? selectedModel?.id
|
||||
assistant.model.engine = settings.engine ?? selectedModel?.engine
|
||||
|
||||
@ -165,7 +165,8 @@ const ChatInput: React.FC = () => {
|
||||
if (
|
||||
fileUpload.length > 0 ||
|
||||
(activeThread?.assistants[0].tools &&
|
||||
!activeThread?.assistants[0].tools[0]?.enabled)
|
||||
!activeThread?.assistants[0].tools[0]?.enabled &&
|
||||
!activeThread?.assistants[0].model.settings.vision_model)
|
||||
) {
|
||||
e.stopPropagation()
|
||||
} else {
|
||||
@ -177,7 +178,8 @@ const ChatInput: React.FC = () => {
|
||||
<TooltipPortal>
|
||||
{fileUpload.length > 0 ||
|
||||
(activeThread?.assistants[0].tools &&
|
||||
!activeThread?.assistants[0].tools[0]?.enabled && (
|
||||
!activeThread?.assistants[0].tools[0]?.enabled &&
|
||||
!activeThread?.assistants[0].model.settings.vision_model && (
|
||||
<TooltipContent side="top" className="max-w-[154px] px-3">
|
||||
{fileUpload.length !== 0 && (
|
||||
<span>
|
||||
@ -206,15 +208,41 @@ const ChatInput: React.FC = () => {
|
||||
className="absolute bottom-10 right-0 w-36 cursor-pointer rounded-lg border border-border bg-background py-1 shadow"
|
||||
>
|
||||
<ul>
|
||||
<li className="flex w-full cursor-not-allowed items-center space-x-2 px-4 py-2 text-muted-foreground opacity-50 hover:bg-secondary">
|
||||
<li
|
||||
className={twMerge(
|
||||
'flex w-full items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
|
||||
activeThread?.assistants[0].model.settings.vision_model
|
||||
? 'cursor-pointer'
|
||||
: 'cursor-not-allowed opacity-50'
|
||||
)}
|
||||
onClick={() => {
|
||||
if (activeThread?.assistants[0].model.settings.vision_model) {
|
||||
imageInputRef.current?.click()
|
||||
setShowAttacmentMenus(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
<ImageIcon size={16} />
|
||||
<span className="font-medium">Image</span>
|
||||
</li>
|
||||
<li
|
||||
className="flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary"
|
||||
className={twMerge(
|
||||
'flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
|
||||
activeThread?.assistants[0].model.settings.vision_model &&
|
||||
activeThread?.assistants[0].model.settings.text_model ===
|
||||
false
|
||||
? 'cursor-not-allowed opacity-50'
|
||||
: 'cursor-pointer'
|
||||
)}
|
||||
onClick={() => {
|
||||
fileInputRef.current?.click()
|
||||
setShowAttacmentMenus(false)
|
||||
if (
|
||||
!activeThread?.assistants[0].model.settings.vision_model ||
|
||||
activeThread?.assistants[0].model.settings.text_model !==
|
||||
false
|
||||
) {
|
||||
fileInputRef.current?.click()
|
||||
setShowAttacmentMenus(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
<FileTextIcon size={16} />
|
||||
|
||||
30
web/screens/Chat/SimpleTextMessage/RelativeImage.tsx
Normal file
30
web/screens/Chat/SimpleTextMessage/RelativeImage.tsx
Normal file
@ -0,0 +1,30 @@
|
||||
import { useEffect, useState } from 'react'
|
||||
|
||||
import { getJanDataFolderPath } from '@janhq/core'
|
||||
|
||||
export const RelativeImage = ({
|
||||
id,
|
||||
src,
|
||||
onClick,
|
||||
}: {
|
||||
id: string
|
||||
src: string
|
||||
onClick: () => void
|
||||
}) => {
|
||||
const [path, setPath] = useState<string>('')
|
||||
|
||||
useEffect(() => {
|
||||
getJanDataFolderPath().then((dataFolderPath) => {
|
||||
setPath(dataFolderPath)
|
||||
})
|
||||
}, [])
|
||||
return (
|
||||
<button onClick={onClick}>
|
||||
<img
|
||||
className="aspect-auto h-[300px] cursor-pointer"
|
||||
alt={id}
|
||||
src={src.includes('files/') ? `file://${path}/${src}` : src}
|
||||
/>
|
||||
</button>
|
||||
)
|
||||
}
|
||||
@ -38,6 +38,8 @@ import EditChatInput from '../EditChatInput'
|
||||
import Icon from '../FileUploadPreview/Icon'
|
||||
import MessageToolbar from '../MessageToolbar'
|
||||
|
||||
import { RelativeImage } from './RelativeImage'
|
||||
|
||||
import {
|
||||
editMessageAtom,
|
||||
getCurrentChatMessagesAtom,
|
||||
@ -204,14 +206,16 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
|
||||
<div className={twMerge('w-full')}>
|
||||
<>
|
||||
{props.content[0]?.type === ContentType.Image && (
|
||||
<div className="group/image relative mb-2 inline-flex overflow-hidden rounded-xl">
|
||||
<img
|
||||
className="aspect-auto h-[300px]"
|
||||
alt={props.content[0]?.text.name}
|
||||
src={props.content[0]?.text.annotations[0]}
|
||||
onClick={() => onViewFile(`${props.id}.png`)}
|
||||
/>
|
||||
<div className="absolute left-0 top-0 z-20 hidden h-full w-full bg-black/20 group-hover/image:inline-block" />
|
||||
<div className="group/image relative mb-2 inline-flex cursor-pointer overflow-hidden rounded-xl">
|
||||
<div className="left-0 top-0 z-20 h-full w-full bg-black/20 group-hover/image:inline-block">
|
||||
<RelativeImage
|
||||
src={props.content[0]?.text.annotations[0]}
|
||||
id={props.id}
|
||||
onClick={() =>
|
||||
onViewFile(`${props.content[0]?.text.annotations[0]}`)
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<div
|
||||
|
||||
@ -7,3 +7,39 @@ export const getBase64 = async (file: File): Promise<string> =>
|
||||
resolve(baseURL as string)
|
||||
}
|
||||
})
|
||||
|
||||
export function compressImage(
|
||||
base64Image: string,
|
||||
size: number
|
||||
): Promise<string> {
|
||||
// Create a canvas element
|
||||
const canvas = document.createElement('canvas')
|
||||
const ctx = canvas.getContext('2d')
|
||||
|
||||
// Create an image object
|
||||
const image = new Image()
|
||||
|
||||
// Set the image source to the base64 string
|
||||
image.src = base64Image
|
||||
|
||||
return new Promise((resolve) => {
|
||||
// Wait for the image to load
|
||||
image.onload = () => {
|
||||
// Set the canvas width and height to the image width and height
|
||||
const width = Math.min(size, image.width)
|
||||
const height = (image.height / image.width) * width
|
||||
|
||||
canvas.width = width
|
||||
canvas.height = height
|
||||
|
||||
// Draw the image on the canvas
|
||||
ctx?.drawImage(image, 0, 0, canvas.width, canvas.height)
|
||||
|
||||
// Convert the canvas to a data URL with the specified quality
|
||||
const compressedBase64Image = canvas.toDataURL(`image/jpeg`, 1)
|
||||
|
||||
// Log the compressed base64 image
|
||||
return resolve(compressedBase64Image)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user