diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 644c34dfb..3cbe799e2 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -7,7 +7,7 @@ export type ModelInfo = { settings: ModelSettingParams parameters: ModelRuntimeParams engine?: InferenceEngine - proxyEngine?: InferenceEngine + proxy_model?: InferenceEngine } /** @@ -93,12 +93,7 @@ export type Model = { */ engine: InferenceEngine - proxyEngine?: InferenceEngine - - /** - * Is multimodal or not. - */ - visionModel?: boolean + proxy_model?: InferenceEngine } export type ModelMetadata = { @@ -124,6 +119,8 @@ export type ModelSettingParams = { llama_model_path?: string mmproj?: string cont_batching?: boolean + vision_model?: boolean + text_model?: boolean } /** diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts index 0a5319c8a..a2c153295 100644 --- a/extensions/assistant-extension/src/index.ts +++ b/extensions/assistant-extension/src/index.ts @@ -10,6 +10,7 @@ import { executeOnMain, AssistantExtension, AssistantEvent, + ChatCompletionMessageContentType, } from '@janhq/core' export default class JanAssistantExtension extends AssistantExtension { @@ -86,7 +87,7 @@ export default class JanAssistantExtension extends AssistantExtension { NODE, 'toolRetrievalIngestNewDocument', docFile, - data.model?.proxyEngine + data.model?.proxy_model ) } } else if ( @@ -105,7 +106,7 @@ export default class JanAssistantExtension extends AssistantExtension { ...data, model: { ...data.model, - engine: data.model.proxyEngine, + engine: data.model.proxy_model, }, } events.emit(MessageEvent.OnMessageSent, output) @@ -168,7 +169,7 @@ export default class JanAssistantExtension extends AssistantExtension { ...data, model: { ...data.model, - engine: data.model.proxyEngine, + engine: data.model.proxy_model, }, } events.emit(MessageEvent.OnMessageSent, output) diff --git a/models/bakllava-1/model.json b/models/bakllava-1/model.json index 91f6f4136..93f87c7f4 100644 --- a/models/bakllava-1/model.json +++ b/models/bakllava-1/model.json @@ -16,6 +16,8 @@ "description": "BakLlava 1 can bring vision understanding to Jan", "format": "gguf", "settings": { + "vision_model": true, + "text_model": false, "ctx_len": 4096, "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", "llama_model_path": "ggml-model-q5_k.gguf", diff --git a/models/gpt-3.5-turbo-16k-0613/model.json b/models/gpt-3.5-turbo-16k-0613/model.json index aa57e1154..ba4aed14d 100644 --- a/models/gpt-3.5-turbo-16k-0613/model.json +++ b/models/gpt-3.5-turbo-16k-0613/model.json @@ -11,7 +11,10 @@ "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good", "format": "api", "settings": {}, - "parameters": {}, + "parameters": { + "max_tokens": 4096, + "temperature": 0.7 + }, "metadata": { "author": "OpenAI", "tags": ["General", "Big Context Length"] diff --git a/models/gpt-3.5-turbo/model.json b/models/gpt-3.5-turbo/model.json index 617f0d056..0359f1950 100644 --- a/models/gpt-3.5-turbo/model.json +++ b/models/gpt-3.5-turbo/model.json @@ -11,7 +11,10 @@ "description": "OpenAI GPT 3.5 Turbo model is extremely good", "format": "api", "settings": {}, - "parameters": {}, + "parameters": { + "max_tokens": 4096, + "temperature": 0.7 + }, "metadata": { "author": "OpenAI", "tags": ["General", "Big Context Length"] diff --git a/models/gpt-4-vision-preview/model.json b/models/gpt-4-vision-preview/model.json new file mode 100644 index 000000000..a5febf207 --- /dev/null +++ b/models/gpt-4-vision-preview/model.json @@ -0,0 +1,26 @@ +{ + "sources": [ + { + "url": "https://openai.com" + } + ], + "id": "gpt-4-vision-preview", + "object": "model", + "name": "OpenAI GPT 4 with Vision (Preview)", + "version": "1.0", + "description": "OpenAI GPT 4 with Vision model is extremely good in preview", + "format": "api", + "settings": { + "vision_model": true, + "textModel": false + }, + "parameters": { + "max_tokens": 4096, + "temperature": 0.7 + }, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length", "Vision"] + }, + "engine": "openai" +} diff --git a/models/gpt-4/model.json b/models/gpt-4/model.json index 7aa2338e3..1d99374ef 100644 --- a/models/gpt-4/model.json +++ b/models/gpt-4/model.json @@ -11,7 +11,10 @@ "description": "OpenAI GPT 4 model is extremely good", "format": "api", "settings": {}, - "parameters": {}, + "parameters": { + "max_tokens": 4096, + "temperature": 0.7 + }, "metadata": { "author": "OpenAI", "tags": ["General", "Big Context Length"] diff --git a/models/llava-1.5-13b-q5/model.json b/models/llava-1.5-13b-q5/model.json index 027b8398f..ae17a7cb1 100644 --- a/models/llava-1.5-13b-q5/model.json +++ b/models/llava-1.5-13b-q5/model.json @@ -12,10 +12,12 @@ "id": "llava-1.5-13b-q5", "object": "model", "name": "LlaVa 1.5 13B Q5 K", - "version": "1.0", + "version": "1.1", "description": "LlaVa 1.5 can bring vision understanding to Jan", "format": "gguf", "settings": { + "vision_model": true, + "text_model": false, "ctx_len": 4096, "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", "llama_model_path": "ggml-model-q5_k.gguf", diff --git a/models/llava-1.5-7b-q5/model.json b/models/llava-1.5-7b-q5/model.json index 658b98880..1c98a519e 100644 --- a/models/llava-1.5-7b-q5/model.json +++ b/models/llava-1.5-7b-q5/model.json @@ -12,10 +12,12 @@ "id": "llava-1.5-7b-q5", "object": "model", "name": "LlaVa 1.5 7B Q5 K", - "version": "1.0", + "version": "1.1", "description": "LlaVa 1.5 can bring vision understanding to Jan", "format": "gguf", "settings": { + "vision_model": true, + "text_model": false, "ctx_len": 4096, "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", "llama_model_path": "ggml-model-q5_k.gguf", diff --git a/web/containers/DropdownListSidebar/index.tsx b/web/containers/DropdownListSidebar/index.tsx index c05d26e51..2940fbdbe 100644 --- a/web/containers/DropdownListSidebar/index.tsx +++ b/web/containers/DropdownListSidebar/index.tsx @@ -139,11 +139,16 @@ const DropdownListSidebar = ({ // Update model parameter to the thread file if (model) - updateModelParameter(activeThread.id, { - params: modelParams, - modelId: model.id, - engine: model.engine, - }) + updateModelParameter( + activeThread.id, + { + params: modelParams, + modelId: model.id, + engine: model.engine, + }, + // Overwrite the existing model parameter + true + ) } }, [ diff --git a/web/hooks/usePath.ts b/web/hooks/usePath.ts index bc4a94d1f..545cfb6ee 100644 --- a/web/hooks/usePath.ts +++ b/web/hooks/usePath.ts @@ -1,4 +1,9 @@ -import { openFileExplorer, joinPath, getJanDataFolderPath } from '@janhq/core' +import { + openFileExplorer, + joinPath, + getJanDataFolderPath, + baseName, +} from '@janhq/core' import { useAtomValue } from 'jotai' import { selectedModelAtom } from '@/containers/DropdownListSidebar' @@ -78,6 +83,8 @@ export const usePath = () => { const userSpace = await getJanDataFolderPath() let filePath = undefined + + id = await baseName(id) filePath = await joinPath(['threads', `${activeThread.id}/files`, `${id}`]) if (!filePath) return const fullPath = await joinPath([userSpace, filePath]) diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index 6c8f98aab..9b3664203 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -30,7 +30,7 @@ import { fileUploadAtom, } from '@/containers/Providers/Jotai' -import { getBase64 } from '@/utils/base64' +import { compressImage, getBase64 } from '@/utils/base64' import { toRuntimeParams, toSettingParams } from '@/utils/modelParam' import { loadModelErrorAtom, useActiveModel } from './useActiveModel' @@ -169,12 +169,22 @@ export default function useSendChatMessage() { setCurrentPrompt('') setEditPrompt('') - const base64Blob = fileUpload[0] - ? await getBase64(fileUpload[0].file).then() + let base64Blob = fileUpload[0] + ? await getBase64(fileUpload[0].file) : undefined + const fileContentType = fileUpload[0]?.type + const msgId = ulid() + const isDocumentInput = base64Blob && fileContentType === 'pdf' + const isImageInput = base64Blob && fileContentType === 'image' + + if (isImageInput && base64Blob) { + // Compress image + base64Blob = await compressImage(base64Blob, 512) + } + const messages: ChatCompletionMessage[] = [ activeThread.assistants[0]?.instructions, ] @@ -202,13 +212,23 @@ export default function useSendChatMessage() { type: ChatCompletionMessageContentType.Text, text: prompt, }, - { - type: ChatCompletionMessageContentType.Doc, - doc_url: { - url: `threads/${activeThread.id}/files/${msgId}.pdf`, - }, - }, - ] + isDocumentInput + ? { + type: ChatCompletionMessageContentType.Doc, + doc_url: { + url: `threads/${activeThread.id}/files/${msgId}.pdf`, + }, + } + : null, + isImageInput + ? { + type: ChatCompletionMessageContentType.Image, + image_url: { + url: base64Blob, + }, + } + : null, + ].filter((e) => e !== null) : prompt, } as ChatCompletionMessage, ]) @@ -226,8 +246,13 @@ export default function useSendChatMessage() { ) { modelRequest = { ...modelRequest, - engine: InferenceEngine.tool_retrieval_enabled, - proxyEngine: modelRequest.engine, + // Tool retrieval support document input only for now + ...(isDocumentInput + ? { + engine: InferenceEngine.tool_retrieval_enabled, + proxy_model: modelRequest.engine, + } + : {}), } } const messageRequest: MessageRequest = { diff --git a/web/hooks/useSetActiveThread.ts b/web/hooks/useSetActiveThread.ts index 486a14d03..8e9268065 100644 --- a/web/hooks/useSetActiveThread.ts +++ b/web/hooks/useSetActiveThread.ts @@ -21,17 +21,17 @@ export default function useSetActiveThread() { const setActiveThread = async (thread: Thread) => { // Load local messages only if there are no messages in the state - if (!readyMessageThreads[thread.id]) { - const messages = await getLocalThreadMessage(thread.id) - setThreadMessage(thread.id, messages) + if (!readyMessageThreads[thread?.id]) { + const messages = await getLocalThreadMessage(thread?.id) + setThreadMessage(thread?.id, messages) } - setActiveThreadId(thread.id) + setActiveThreadId(thread?.id) const modelParams: ModelParams = { - ...thread.assistants[0]?.model?.parameters, - ...thread.assistants[0]?.model?.settings, + ...thread?.assistants[0]?.model?.parameters, + ...thread?.assistants[0]?.model?.settings, } - setThreadModelParams(thread.id, modelParams) + setThreadModelParams(thread?.id, modelParams) } return { setActiveThread } diff --git a/web/hooks/useUpdateModelParameters.ts b/web/hooks/useUpdateModelParameters.ts index 694394cee..99663229b 100644 --- a/web/hooks/useUpdateModelParameters.ts +++ b/web/hooks/useUpdateModelParameters.ts @@ -37,7 +37,8 @@ export default function useUpdateModelParameters() { const updateModelParameter = async ( threadId: string, - settings: UpdateModelParameter + settings: UpdateModelParameter, + overwrite: boolean = false ) => { const thread = threads.find((thread) => thread.id === threadId) if (!thread) { @@ -66,8 +67,14 @@ export default function useUpdateModelParameters() { const runtimeParams = toRuntimeParams(updatedModelParams) const settingParams = toSettingParams(updatedModelParams) - assistant.model.parameters = runtimeParams - assistant.model.settings = settingParams + assistant.model.parameters = { + ...(overwrite ? {} : assistant.model.parameters), + ...runtimeParams, + } + assistant.model.settings = { + ...(overwrite ? {} : assistant.model.settings), + ...settingParams, + } if (selectedModel) { assistant.model.id = settings.modelId ?? selectedModel?.id assistant.model.engine = settings.engine ?? selectedModel?.engine diff --git a/web/screens/Chat/ChatInput/index.tsx b/web/screens/Chat/ChatInput/index.tsx index d5334cab8..052720d6c 100644 --- a/web/screens/Chat/ChatInput/index.tsx +++ b/web/screens/Chat/ChatInput/index.tsx @@ -165,7 +165,8 @@ const ChatInput: React.FC = () => { if ( fileUpload.length > 0 || (activeThread?.assistants[0].tools && - !activeThread?.assistants[0].tools[0]?.enabled) + !activeThread?.assistants[0].tools[0]?.enabled && + !activeThread?.assistants[0].model.settings.vision_model) ) { e.stopPropagation() } else { @@ -177,7 +178,8 @@ const ChatInput: React.FC = () => { {fileUpload.length > 0 || (activeThread?.assistants[0].tools && - !activeThread?.assistants[0].tools[0]?.enabled && ( + !activeThread?.assistants[0].tools[0]?.enabled && + !activeThread?.assistants[0].model.settings.vision_model && ( {fileUpload.length !== 0 && ( @@ -206,15 +208,41 @@ const ChatInput: React.FC = () => { className="absolute bottom-10 right-0 w-36 cursor-pointer rounded-lg border border-border bg-background py-1 shadow" >
    -
  • +
  • { + if (activeThread?.assistants[0].model.settings.vision_model) { + imageInputRef.current?.click() + setShowAttacmentMenus(false) + } + }} + > Image
  • { - fileInputRef.current?.click() - setShowAttacmentMenus(false) + if ( + !activeThread?.assistants[0].model.settings.vision_model || + activeThread?.assistants[0].model.settings.text_model !== + false + ) { + fileInputRef.current?.click() + setShowAttacmentMenus(false) + } }} > diff --git a/web/screens/Chat/SimpleTextMessage/RelativeImage.tsx b/web/screens/Chat/SimpleTextMessage/RelativeImage.tsx new file mode 100644 index 000000000..fda4c0755 --- /dev/null +++ b/web/screens/Chat/SimpleTextMessage/RelativeImage.tsx @@ -0,0 +1,30 @@ +import { useEffect, useState } from 'react' + +import { getJanDataFolderPath } from '@janhq/core' + +export const RelativeImage = ({ + id, + src, + onClick, +}: { + id: string + src: string + onClick: () => void +}) => { + const [path, setPath] = useState('') + + useEffect(() => { + getJanDataFolderPath().then((dataFolderPath) => { + setPath(dataFolderPath) + }) + }, []) + return ( + + ) +} diff --git a/web/screens/Chat/SimpleTextMessage/index.tsx b/web/screens/Chat/SimpleTextMessage/index.tsx index e5e2364b4..8d72614ad 100644 --- a/web/screens/Chat/SimpleTextMessage/index.tsx +++ b/web/screens/Chat/SimpleTextMessage/index.tsx @@ -38,6 +38,8 @@ import EditChatInput from '../EditChatInput' import Icon from '../FileUploadPreview/Icon' import MessageToolbar from '../MessageToolbar' +import { RelativeImage } from './RelativeImage' + import { editMessageAtom, getCurrentChatMessagesAtom, @@ -204,14 +206,16 @@ const SimpleTextMessage: React.FC = (props) => {
    <> {props.content[0]?.type === ContentType.Image && ( -
    - {props.content[0]?.text.name} onViewFile(`${props.id}.png`)} - /> -
    +
    +
    + + onViewFile(`${props.content[0]?.text.annotations[0]}`) + } + /> +
    => resolve(baseURL as string) } }) + +export function compressImage( + base64Image: string, + size: number +): Promise { + // Create a canvas element + const canvas = document.createElement('canvas') + const ctx = canvas.getContext('2d') + + // Create an image object + const image = new Image() + + // Set the image source to the base64 string + image.src = base64Image + + return new Promise((resolve) => { + // Wait for the image to load + image.onload = () => { + // Set the canvas width and height to the image width and height + const width = Math.min(size, image.width) + const height = (image.height / image.width) * width + + canvas.width = width + canvas.height = height + + // Draw the image on the canvas + ctx?.drawImage(image, 0, 0, canvas.width, canvas.height) + + // Convert the canvas to a data URL with the specified quality + const compressedBase64Image = canvas.toDataURL(`image/jpeg`, 1) + + // Log the compressed base64 image + return resolve(compressedBase64Image) + } + }) +}