feat: Jan can see (#2069)

* feat: jan can see

feat: Add GPT-4 Vision model (Preview)

fix: Add visionModel as property in ModelInfo

fix: Fix condition to load local messages in useSetActiveThread hook

feat: Enable Image as input for chat

fix: Update model parameters in JSON files for remote GPT models

fix: Add thread as optional

fix: Add support for message as image

fix: Linter

fix: Update proxyModel to proxy_model and add textModel

chore: Change proxyModel to proxy_model

fix: Update settings with visionModel and textModel

fix: vision model passed through the retrieval tool

fix: linter

* fix: could not load image and request is not able to be sent

---------

Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
hiro 2024-03-05 08:33:09 +07:00 committed by GitHub
parent 1584f0d21e
commit e6c10202e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 238 additions and 57 deletions

View File

@ -7,7 +7,7 @@ export type ModelInfo = {
settings: ModelSettingParams
parameters: ModelRuntimeParams
engine?: InferenceEngine
proxyEngine?: InferenceEngine
proxy_model?: InferenceEngine
}
/**
@ -93,12 +93,7 @@ export type Model = {
*/
engine: InferenceEngine
proxyEngine?: InferenceEngine
/**
* Is multimodal or not.
*/
visionModel?: boolean
proxy_model?: InferenceEngine
}
export type ModelMetadata = {
@ -124,6 +119,8 @@ export type ModelSettingParams = {
llama_model_path?: string
mmproj?: string
cont_batching?: boolean
vision_model?: boolean
text_model?: boolean
}
/**

View File

@ -10,6 +10,7 @@ import {
executeOnMain,
AssistantExtension,
AssistantEvent,
ChatCompletionMessageContentType,
} from '@janhq/core'
export default class JanAssistantExtension extends AssistantExtension {
@ -86,7 +87,7 @@ export default class JanAssistantExtension extends AssistantExtension {
NODE,
'toolRetrievalIngestNewDocument',
docFile,
data.model?.proxyEngine
data.model?.proxy_model
)
}
} else if (
@ -105,7 +106,7 @@ export default class JanAssistantExtension extends AssistantExtension {
...data,
model: {
...data.model,
engine: data.model.proxyEngine,
engine: data.model.proxy_model,
},
}
events.emit(MessageEvent.OnMessageSent, output)
@ -168,7 +169,7 @@ export default class JanAssistantExtension extends AssistantExtension {
...data,
model: {
...data.model,
engine: data.model.proxyEngine,
engine: data.model.proxy_model,
},
}
events.emit(MessageEvent.OnMessageSent, output)

View File

@ -16,6 +16,8 @@
"description": "BakLlava 1 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"vision_model": true,
"text_model": false,
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",

View File

@ -11,7 +11,10 @@
"description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7
},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]

View File

@ -11,7 +11,10 @@
"description": "OpenAI GPT 3.5 Turbo model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7
},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]

View File

@ -0,0 +1,26 @@
{
"sources": [
{
"url": "https://openai.com"
}
],
"id": "gpt-4-vision-preview",
"object": "model",
"name": "OpenAI GPT 4 with Vision (Preview)",
"version": "1.0",
"description": "OpenAI GPT 4 with Vision model is extremely good in preview",
"format": "api",
"settings": {
"vision_model": true,
"textModel": false
},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7
},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length", "Vision"]
},
"engine": "openai"
}

View File

@ -11,7 +11,10 @@
"description": "OpenAI GPT 4 model is extremely good",
"format": "api",
"settings": {},
"parameters": {},
"parameters": {
"max_tokens": 4096,
"temperature": 0.7
},
"metadata": {
"author": "OpenAI",
"tags": ["General", "Big Context Length"]

View File

@ -12,10 +12,12 @@
"id": "llava-1.5-13b-q5",
"object": "model",
"name": "LlaVa 1.5 13B Q5 K",
"version": "1.0",
"version": "1.1",
"description": "LlaVa 1.5 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"vision_model": true,
"text_model": false,
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",

View File

@ -12,10 +12,12 @@
"id": "llava-1.5-7b-q5",
"object": "model",
"name": "LlaVa 1.5 7B Q5 K",
"version": "1.0",
"version": "1.1",
"description": "LlaVa 1.5 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"vision_model": true,
"text_model": false,
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",

View File

@ -139,11 +139,16 @@ const DropdownListSidebar = ({
// Update model parameter to the thread file
if (model)
updateModelParameter(activeThread.id, {
params: modelParams,
modelId: model.id,
engine: model.engine,
})
updateModelParameter(
activeThread.id,
{
params: modelParams,
modelId: model.id,
engine: model.engine,
},
// Overwrite the existing model parameter
true
)
}
},
[

View File

@ -1,4 +1,9 @@
import { openFileExplorer, joinPath, getJanDataFolderPath } from '@janhq/core'
import {
openFileExplorer,
joinPath,
getJanDataFolderPath,
baseName,
} from '@janhq/core'
import { useAtomValue } from 'jotai'
import { selectedModelAtom } from '@/containers/DropdownListSidebar'
@ -78,6 +83,8 @@ export const usePath = () => {
const userSpace = await getJanDataFolderPath()
let filePath = undefined
id = await baseName(id)
filePath = await joinPath(['threads', `${activeThread.id}/files`, `${id}`])
if (!filePath) return
const fullPath = await joinPath([userSpace, filePath])

View File

@ -30,7 +30,7 @@ import {
fileUploadAtom,
} from '@/containers/Providers/Jotai'
import { getBase64 } from '@/utils/base64'
import { compressImage, getBase64 } from '@/utils/base64'
import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'
import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
@ -169,12 +169,22 @@ export default function useSendChatMessage() {
setCurrentPrompt('')
setEditPrompt('')
const base64Blob = fileUpload[0]
? await getBase64(fileUpload[0].file).then()
let base64Blob = fileUpload[0]
? await getBase64(fileUpload[0].file)
: undefined
const fileContentType = fileUpload[0]?.type
const msgId = ulid()
const isDocumentInput = base64Blob && fileContentType === 'pdf'
const isImageInput = base64Blob && fileContentType === 'image'
if (isImageInput && base64Blob) {
// Compress image
base64Blob = await compressImage(base64Blob, 512)
}
const messages: ChatCompletionMessage[] = [
activeThread.assistants[0]?.instructions,
]
@ -202,13 +212,23 @@ export default function useSendChatMessage() {
type: ChatCompletionMessageContentType.Text,
text: prompt,
},
{
type: ChatCompletionMessageContentType.Doc,
doc_url: {
url: `threads/${activeThread.id}/files/${msgId}.pdf`,
},
},
]
isDocumentInput
? {
type: ChatCompletionMessageContentType.Doc,
doc_url: {
url: `threads/${activeThread.id}/files/${msgId}.pdf`,
},
}
: null,
isImageInput
? {
type: ChatCompletionMessageContentType.Image,
image_url: {
url: base64Blob,
},
}
: null,
].filter((e) => e !== null)
: prompt,
} as ChatCompletionMessage,
])
@ -226,8 +246,13 @@ export default function useSendChatMessage() {
) {
modelRequest = {
...modelRequest,
engine: InferenceEngine.tool_retrieval_enabled,
proxyEngine: modelRequest.engine,
// Tool retrieval support document input only for now
...(isDocumentInput
? {
engine: InferenceEngine.tool_retrieval_enabled,
proxy_model: modelRequest.engine,
}
: {}),
}
}
const messageRequest: MessageRequest = {

View File

@ -21,17 +21,17 @@ export default function useSetActiveThread() {
const setActiveThread = async (thread: Thread) => {
// Load local messages only if there are no messages in the state
if (!readyMessageThreads[thread.id]) {
const messages = await getLocalThreadMessage(thread.id)
setThreadMessage(thread.id, messages)
if (!readyMessageThreads[thread?.id]) {
const messages = await getLocalThreadMessage(thread?.id)
setThreadMessage(thread?.id, messages)
}
setActiveThreadId(thread.id)
setActiveThreadId(thread?.id)
const modelParams: ModelParams = {
...thread.assistants[0]?.model?.parameters,
...thread.assistants[0]?.model?.settings,
...thread?.assistants[0]?.model?.parameters,
...thread?.assistants[0]?.model?.settings,
}
setThreadModelParams(thread.id, modelParams)
setThreadModelParams(thread?.id, modelParams)
}
return { setActiveThread }

View File

@ -37,7 +37,8 @@ export default function useUpdateModelParameters() {
const updateModelParameter = async (
threadId: string,
settings: UpdateModelParameter
settings: UpdateModelParameter,
overwrite: boolean = false
) => {
const thread = threads.find((thread) => thread.id === threadId)
if (!thread) {
@ -66,8 +67,14 @@ export default function useUpdateModelParameters() {
const runtimeParams = toRuntimeParams(updatedModelParams)
const settingParams = toSettingParams(updatedModelParams)
assistant.model.parameters = runtimeParams
assistant.model.settings = settingParams
assistant.model.parameters = {
...(overwrite ? {} : assistant.model.parameters),
...runtimeParams,
}
assistant.model.settings = {
...(overwrite ? {} : assistant.model.settings),
...settingParams,
}
if (selectedModel) {
assistant.model.id = settings.modelId ?? selectedModel?.id
assistant.model.engine = settings.engine ?? selectedModel?.engine

View File

@ -165,7 +165,8 @@ const ChatInput: React.FC = () => {
if (
fileUpload.length > 0 ||
(activeThread?.assistants[0].tools &&
!activeThread?.assistants[0].tools[0]?.enabled)
!activeThread?.assistants[0].tools[0]?.enabled &&
!activeThread?.assistants[0].model.settings.vision_model)
) {
e.stopPropagation()
} else {
@ -177,7 +178,8 @@ const ChatInput: React.FC = () => {
<TooltipPortal>
{fileUpload.length > 0 ||
(activeThread?.assistants[0].tools &&
!activeThread?.assistants[0].tools[0]?.enabled && (
!activeThread?.assistants[0].tools[0]?.enabled &&
!activeThread?.assistants[0].model.settings.vision_model && (
<TooltipContent side="top" className="max-w-[154px] px-3">
{fileUpload.length !== 0 && (
<span>
@ -206,15 +208,41 @@ const ChatInput: React.FC = () => {
className="absolute bottom-10 right-0 w-36 cursor-pointer rounded-lg border border-border bg-background py-1 shadow"
>
<ul>
<li className="flex w-full cursor-not-allowed items-center space-x-2 px-4 py-2 text-muted-foreground opacity-50 hover:bg-secondary">
<li
className={twMerge(
'flex w-full items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
activeThread?.assistants[0].model.settings.vision_model
? 'cursor-pointer'
: 'cursor-not-allowed opacity-50'
)}
onClick={() => {
if (activeThread?.assistants[0].model.settings.vision_model) {
imageInputRef.current?.click()
setShowAttacmentMenus(false)
}
}}
>
<ImageIcon size={16} />
<span className="font-medium">Image</span>
</li>
<li
className="flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary"
className={twMerge(
'flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
activeThread?.assistants[0].model.settings.vision_model &&
activeThread?.assistants[0].model.settings.text_model ===
false
? 'cursor-not-allowed opacity-50'
: 'cursor-pointer'
)}
onClick={() => {
fileInputRef.current?.click()
setShowAttacmentMenus(false)
if (
!activeThread?.assistants[0].model.settings.vision_model ||
activeThread?.assistants[0].model.settings.text_model !==
false
) {
fileInputRef.current?.click()
setShowAttacmentMenus(false)
}
}}
>
<FileTextIcon size={16} />

View File

@ -0,0 +1,30 @@
import { useEffect, useState } from 'react'
import { getJanDataFolderPath } from '@janhq/core'
export const RelativeImage = ({
id,
src,
onClick,
}: {
id: string
src: string
onClick: () => void
}) => {
const [path, setPath] = useState<string>('')
useEffect(() => {
getJanDataFolderPath().then((dataFolderPath) => {
setPath(dataFolderPath)
})
}, [])
return (
<button onClick={onClick}>
<img
className="aspect-auto h-[300px] cursor-pointer"
alt={id}
src={src.includes('files/') ? `file://${path}/${src}` : src}
/>
</button>
)
}

View File

@ -38,6 +38,8 @@ import EditChatInput from '../EditChatInput'
import Icon from '../FileUploadPreview/Icon'
import MessageToolbar from '../MessageToolbar'
import { RelativeImage } from './RelativeImage'
import {
editMessageAtom,
getCurrentChatMessagesAtom,
@ -204,14 +206,16 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
<div className={twMerge('w-full')}>
<>
{props.content[0]?.type === ContentType.Image && (
<div className="group/image relative mb-2 inline-flex overflow-hidden rounded-xl">
<img
className="aspect-auto h-[300px]"
alt={props.content[0]?.text.name}
src={props.content[0]?.text.annotations[0]}
onClick={() => onViewFile(`${props.id}.png`)}
/>
<div className="absolute left-0 top-0 z-20 hidden h-full w-full bg-black/20 group-hover/image:inline-block" />
<div className="group/image relative mb-2 inline-flex cursor-pointer overflow-hidden rounded-xl">
<div className="left-0 top-0 z-20 h-full w-full bg-black/20 group-hover/image:inline-block">
<RelativeImage
src={props.content[0]?.text.annotations[0]}
id={props.id}
onClick={() =>
onViewFile(`${props.content[0]?.text.annotations[0]}`)
}
/>
</div>
<Tooltip>
<TooltipTrigger asChild>
<div

View File

@ -7,3 +7,39 @@ export const getBase64 = async (file: File): Promise<string> =>
resolve(baseURL as string)
}
})
export function compressImage(
base64Image: string,
size: number
): Promise<string> {
// Create a canvas element
const canvas = document.createElement('canvas')
const ctx = canvas.getContext('2d')
// Create an image object
const image = new Image()
// Set the image source to the base64 string
image.src = base64Image
return new Promise((resolve) => {
// Wait for the image to load
image.onload = () => {
// Set the canvas width and height to the image width and height
const width = Math.min(size, image.width)
const height = (image.height / image.width) * width
canvas.width = width
canvas.height = height
// Draw the image on the canvas
ctx?.drawImage(image, 0, 0, canvas.width, canvas.height)
// Convert the canvas to a data URL with the specified quality
const compressedBase64Image = canvas.toDataURL(`image/jpeg`, 1)
// Log the compressed base64 image
return resolve(compressedBase64Image)
}
})
}