diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts index 3236994b2..41de30c1b 100644 --- a/core/src/browser/extensions/engines/AIEngine.ts +++ b/core/src/browser/extensions/engines/AIEngine.ts @@ -13,7 +13,7 @@ export interface chatCompletionRequestMessage { } export interface Content { - type: 'text' | 'input_image' | 'input_audio' + type: 'text' | 'image_url' | 'input_audio' text?: string image_url?: string input_audio?: InputAudio @@ -182,6 +182,7 @@ export interface SessionInfo { model_id: string //name of the model model_path: string // path of the loaded model api_key: string + mmproj_path?: string } export interface UnloadResult { diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index c296e06af..7229552a2 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -21,6 +21,7 @@ import { events, AppEvent, DownloadEvent, + chatCompletionRequestMessage, } from '@janhq/core' import { error, info, warn } from '@tauri-apps/plugin-log' @@ -2296,7 +2297,9 @@ export default class llamacpp_extension extends AIEngine { : Math.floor(maxContextLength) const mmprojInfo = mmprojPath - ? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(2)}MB, offloadMmproj=${offloadMmproj}` + ? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed( + 2 + )}MB, offloadMmproj=${offloadMmproj}` : '' logger.info( @@ -2489,8 +2492,151 @@ export default class llamacpp_extension extends AIEngine { logger.error('Failed to validate GGUF file:', error) return { isValid: false, - error: `Failed to read model metadata: ${error instanceof Error ? error.message : 'Unknown error'}`, + error: `Failed to read model metadata: ${ + error instanceof Error ? error.message : 'Unknown error' + }`, } } } + + async getTokensCount(opts: chatCompletionRequest): Promise { + const sessionInfo = await this.findSessionByModel(opts.model) + if (!sessionInfo) { + throw new Error(`No active session found for model: ${opts.model}`) + } + + // Check if the process is alive + const result = await invoke('plugin:llamacpp|is_process_running', { + pid: sessionInfo.pid, + }) + if (result) { + try { + await fetch(`http://localhost:${sessionInfo.port}/health`) + } catch (e) { + this.unload(sessionInfo.model_id) + throw new Error('Model appears to have crashed! Please reload!') + } + } else { + throw new Error('Model has crashed! Please reload!') + } + + const baseUrl = `http://localhost:${sessionInfo.port}` + const headers = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${sessionInfo.api_key}`, + } + + // Count image tokens first + let imageTokens = 0 + const hasImages = opts.messages.some( + (msg) => + Array.isArray(msg.content) && + msg.content.some((content) => content.type === 'image_url') + ) + + if (hasImages) { + logger.info('Conversation has images') + try { + // Read mmproj metadata to get vision parameters + logger.info(`MMPROJ PATH: ${sessionInfo.mmproj_path}`) + + const metadata = await readGgufMetadata(sessionInfo.mmproj_path) + logger.info(`mmproj metadata: ${JSON.stringify(metadata.metadata)}`) + imageTokens = await this.calculateImageTokens( + opts.messages, + metadata.metadata + ) + } catch (error) { + logger.warn('Failed to calculate image tokens:', error) + // Fallback to a rough estimate if metadata reading fails + imageTokens = this.estimateImageTokensFallback(opts.messages) + } + } + + // Calculate text tokens + const messages = JSON.stringify({ messages: opts.messages }) + + let parseResponse = await fetch(`${baseUrl}/apply-template`, { + method: 'POST', + headers: headers, + body: messages, + }) + + if (!parseResponse.ok) { + const errorData = await parseResponse.json().catch(() => null) + throw new Error( + `API request failed with status ${ + parseResponse.status + }: ${JSON.stringify(errorData)}` + ) + } + + const parsedPrompt = await parseResponse.json() + + const response = await fetch(`${baseUrl}/tokenize`, { + method: 'POST', + headers: headers, + body: JSON.stringify({ + content: parsedPrompt.prompt, + }), + }) + + if (!response.ok) { + const errorData = await response.json().catch(() => null) + throw new Error( + `API request failed with status ${response.status}: ${JSON.stringify( + errorData + )}` + ) + } + + const dataTokens = await response.json() + const textTokens = dataTokens.tokens?.length || 0 + + return textTokens + imageTokens + } + + private async calculateImageTokens( + messages: chatCompletionRequestMessage[], + metadata: Record + ): Promise { + // Extract vision parameters from metadata + const projectionDim = Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256 + + // Count images in messages + let imageCount = 0 + for (const message of messages) { + if (Array.isArray(message.content)) { + imageCount += message.content.filter( + (content) => content.type === 'image_url' + ).length + } + } + + logger.info( + `Calculated ${projectionDim} tokens per image, ${imageCount} images total` + ) + return projectionDim * imageCount - imageCount // remove the lingering <__image__> placeholder token + } + + private estimateImageTokensFallback( + messages: chatCompletionRequestMessage[] + ): number { + // Fallback estimation if metadata reading fails + const estimatedTokensPerImage = 256 // Gemma's siglip + + let imageCount = 0 + for (const message of messages) { + if (Array.isArray(message.content)) { + imageCount += message.content.filter( + (content) => content.type === 'image_url' + ).length + } + } + + logger.warn( + `Fallback estimation: ${estimatedTokensPerImage} tokens per image, ${imageCount} images total` + ) + return imageCount * estimatedTokensPerImage - imageCount // remove the lingering <__image__> placeholder token + } } diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs index 79ec81f5a..96ecb36bc 100644 --- a/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs +++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/commands.rs @@ -12,7 +12,7 @@ use tokio::time::Instant; use crate::device::{get_devices_from_backend, DeviceInfo}; use crate::error::{ErrorCode, LlamacppError, ServerError, ServerResult}; -use crate::path::{validate_binary_path, validate_model_path, validate_mmproj_path}; +use crate::path::{validate_binary_path, validate_mmproj_path, validate_model_path}; use crate::process::{ find_session_by_model_id, get_all_active_sessions, get_all_loaded_model_ids, get_random_available_port, is_process_running_by_pid, @@ -55,7 +55,20 @@ pub async fn load_llama_model( let port = parse_port_from_args(&args); let model_path_pb = validate_model_path(&mut args)?; - let _mmproj_path_pb = validate_mmproj_path(&mut args)?; + let mmproj_path_pb = validate_mmproj_path(&mut args)?; + + let mmproj_path_string = if let Some(ref _mmproj_pb) = mmproj_path_pb { + // Find the actual mmproj path from args after validation/conversion + if let Some(mmproj_index) = args.iter().position(|arg| arg == "--mmproj") { + Some(args[mmproj_index + 1].clone()) + } else { + None + } + } else { + None + }; + + log::info!("MMPROJ Path string: {}", &mmproj_path_string.as_ref().unwrap_or(&"None".to_string())); let api_key: String; @@ -211,6 +224,7 @@ pub async fn load_llama_model( model_id: model_id, model_path: model_path_pb.display().to_string(), api_key: api_key, + mmproj_path: mmproj_path_string, }; // Insert session info to process_map @@ -265,7 +279,7 @@ pub async fn unload_llama_model( pub async fn get_devices( backend_path: &str, library_path: Option<&str>, - envs: HashMap + envs: HashMap, ) -> ServerResult> { get_devices_from_backend(backend_path, library_path, envs).await } diff --git a/src-tauri/plugins/tauri-plugin-llamacpp/src/state.rs b/src-tauri/plugins/tauri-plugin-llamacpp/src/state.rs index 359a27951..2aad02ecf 100644 --- a/src-tauri/plugins/tauri-plugin-llamacpp/src/state.rs +++ b/src-tauri/plugins/tauri-plugin-llamacpp/src/state.rs @@ -11,6 +11,8 @@ pub struct SessionInfo { pub model_id: String, pub model_path: String, // path of the loaded model pub api_key: String, + #[serde(default)] + pub mmproj_path: Option, } pub struct LLamaBackendSession { diff --git a/web-app/src/components/TokenCounter.tsx b/web-app/src/components/TokenCounter.tsx new file mode 100644 index 000000000..0863176c7 --- /dev/null +++ b/web-app/src/components/TokenCounter.tsx @@ -0,0 +1,283 @@ +import { useMemo, useEffect, useState, useRef } from 'react' +import { cn } from '@/lib/utils' +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/components/ui/tooltip' +import { useTokensCount } from '@/hooks/useTokensCount' +import { ThreadMessage } from '@janhq/core' + +interface TokenCounterProps { + messages?: ThreadMessage[] + className?: string + compact?: boolean + additionalTokens?: number // For vision tokens or other additions + uploadedFiles?: Array<{ + name: string + type: string + size: number + base64: string + dataUrl: string + }> +} + +export const TokenCounter = ({ + messages = [], + className, + compact = false, + additionalTokens = 0, + uploadedFiles = [], +}: TokenCounterProps) => { + const { calculateTokens, ...tokenData } = useTokensCount( + messages, + uploadedFiles + ) + + const [isAnimating, setIsAnimating] = useState(false) + const [prevTokenCount, setPrevTokenCount] = useState(0) + const [isUpdating, setIsUpdating] = useState(false) + const timersRef = useRef<{ update?: NodeJS.Timeout; anim?: NodeJS.Timeout }>( + {} + ) + + // Manual calculation - trigger on click + const handleCalculateTokens = () => { + calculateTokens() + } + + // Handle token count changes with proper debouncing and cleanup + useEffect(() => { + const currentTotal = tokenData.tokenCount + additionalTokens + const timers = timersRef.current + + // Clear any existing timers + if (timers.update) clearTimeout(timers.update) + if (timers.anim) clearTimeout(timers.anim) + + if (currentTotal !== prevTokenCount) { + setIsUpdating(true) + + // Clear updating state after a longer delay for smoother transitions + timers.update = setTimeout(() => { + setIsUpdating(false) + }, 250) + + // Only animate for significant changes and avoid animating on initial load + if (prevTokenCount > 0) { + const difference = Math.abs(currentTotal - prevTokenCount) + if (difference > 10) { + // Increased threshold to reduce micro-animations + setIsAnimating(true) + timers.anim = setTimeout(() => { + setIsAnimating(false) + }, 600) + } + } + + setPrevTokenCount(currentTotal) + } + + // Cleanup function + return () => { + if (timers.update) clearTimeout(timers.update) + if (timers.anim) clearTimeout(timers.anim) + } + }, [tokenData.tokenCount, additionalTokens, prevTokenCount]) + + const totalTokens = useMemo(() => { + return tokenData.tokenCount + additionalTokens + }, [tokenData.tokenCount, additionalTokens]) + + // Percentage calculation to match useTokensCount exactly + const adjustedPercentage = useMemo(() => { + if (!tokenData.maxTokens) return undefined + return (totalTokens / tokenData.maxTokens) * 100 + }, [totalTokens, tokenData.maxTokens]) + + // Check if percentage exceeds max (100%) + const isOverLimit = useMemo(() => { + return adjustedPercentage !== undefined && adjustedPercentage > 100 + }, [adjustedPercentage]) + + const formatNumber = (num: number) => { + if (num >= 1000000) return `${(num / 1000000).toFixed(1)}M` + if (num >= 1000) return `${(num / 1000).toFixed(1)}K` + return num.toString() + } + + if (compact) { + return ( + + + +
+ {/* Main compact display */} +
+ + {adjustedPercentage?.toFixed(1) || '0.0'}% + + +
+ + + + +
+
+
+
+ + {/* Detailed breakdown panel */} + <> + {/* Header with percentage and progress bar */} +
+
+ + {adjustedPercentage?.toFixed(1) || '0.0'}% + + + {formatNumber(totalTokens)} /{' '} + {formatNumber(tokenData.maxTokens || 0)} + +
+ + {/* Progress bar */} +
+
+
+
+ + {/* Token breakdown */} +
+
+ Text + + {formatNumber(Math.max(0, tokenData.tokenCount))} + +
+
+ + {/* Remaining tokens */} +
+
+ Remaining + + {formatNumber( + Math.max(0, (tokenData.maxTokens || 0) - totalTokens) + )} + +
+
+ + + + + ) + } + + // Non-compact: Simple inline display + return ( +
+
+ Context  + + {formatNumber(totalTokens)} + + {tokenData.maxTokens && ( + <> + / + + {formatNumber(tokenData.maxTokens)} + + + ({adjustedPercentage?.toFixed(1) || '0.0'}%) + + {isOverLimit && ( + +  {isOverLimit ? '⚠️ Over limit' : 'Tokens used'} + + )} + + )} +
+
+ ) +} diff --git a/web-app/src/components/ui/tooltip.tsx b/web-app/src/components/ui/tooltip.tsx index b7cae36a5..78e71a538 100644 --- a/web-app/src/components/ui/tooltip.tsx +++ b/web-app/src/components/ui/tooltip.tsx @@ -35,9 +35,12 @@ function TooltipTrigger({ function TooltipContent({ className, sideOffset = 0, + showArrow = true, children, ...props -}: React.ComponentProps) { +}: React.ComponentProps & { + showArrow?: boolean +}) { return ( {children} - + {showArrow && ( + + )} ) diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx index f82d17f52..0b34d0d3a 100644 --- a/web-app/src/containers/ChatInput.tsx +++ b/web-app/src/containers/ChatInput.tsx @@ -34,6 +34,9 @@ import { ModelLoader } from '@/containers/loaders/ModelLoader' import DropdownToolsAvailable from '@/containers/DropdownToolsAvailable' import { useServiceHub } from '@/hooks/useServiceHub' import { useTools } from '@/hooks/useTools' +import { TokenCounter } from '@/components/TokenCounter' +import { useMessages } from '@/hooks/useMessages' +import { useShallow } from 'zustand/react/shallow' type ChatInputProps = { className?: string @@ -56,9 +59,21 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => { const setPrompt = usePrompt((state) => state.setPrompt) const currentThreadId = useThreads((state) => state.currentThreadId) const { t } = useTranslation() - const { spellCheckChatInput } = useGeneralSetting() + const spellCheckChatInput = useGeneralSetting( + (state) => state.spellCheckChatInput + ) + const tokenCounterCompact = useGeneralSetting( + (state) => state.tokenCounterCompact + ) useTools() + // Get current thread messages for token counting + const threadMessages = useMessages( + useShallow((state) => + currentThreadId ? state.messages[currentThreadId] : [] + ) + ) + const maxRows = 10 const selectedModel = useModelProvider((state) => state.selectedModel) @@ -79,6 +94,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => { const [connectedServers, setConnectedServers] = useState([]) const [isDragOver, setIsDragOver] = useState(false) const [hasMmproj, setHasMmproj] = useState(false) + const [hasActiveModels, setHasActiveModels] = useState(false) // Check for connected MCP servers useEffect(() => { @@ -100,6 +116,28 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => { return () => clearInterval(intervalId) }, [serviceHub]) + // Check for active models + useEffect(() => { + const checkActiveModels = async () => { + try { + const activeModels = await serviceHub + .models() + .getActiveModels('llamacpp') + setHasActiveModels(activeModels.length > 0) + } catch (error) { + console.error('Failed to get active models:', error) + setHasActiveModels(false) + } + } + + checkActiveModels() + + // Poll for active models every 3 seconds + const intervalId = setInterval(checkActiveModels, 3000) + + return () => clearInterval(intervalId) + }, [serviceHub]) + // Check for mmproj existence or vision capability when model changes useEffect(() => { const checkMmprojSupport = async () => { @@ -742,35 +780,51 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
- {streamingContent ? ( - - ) : ( - - )} + + {streamingContent ? ( + + ) : ( + + )} + @@ -792,6 +846,20 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => { )} + + {selectedProvider === 'llamacpp' && + hasActiveModels && + !tokenCounterCompact && + !initialMessage && + (threadMessages?.length > 0 || prompt.trim().length > 0) && ( +
+ +
+ )} ) } diff --git a/web-app/src/containers/TokenCounterCompactSwitcher.tsx b/web-app/src/containers/TokenCounterCompactSwitcher.tsx new file mode 100644 index 000000000..3270941cd --- /dev/null +++ b/web-app/src/containers/TokenCounterCompactSwitcher.tsx @@ -0,0 +1,17 @@ +import { useGeneralSetting } from '@/hooks/useGeneralSetting' +import { Switch } from '@/components/ui/switch' + +export function TokenCounterCompactSwitcher() { + const { tokenCounterCompact, setTokenCounterCompact } = useGeneralSetting() + + const toggleTokenCounterCompact = () => { + setTokenCounterCompact(!tokenCounterCompact) + } + + return ( + + ) +} diff --git a/web-app/src/hooks/useGeneralSetting.ts b/web-app/src/hooks/useGeneralSetting.ts index b356ca8a3..e76c49017 100644 --- a/web-app/src/hooks/useGeneralSetting.ts +++ b/web-app/src/hooks/useGeneralSetting.ts @@ -6,9 +6,11 @@ import { ExtensionManager } from '@/lib/extension' type LeftPanelStoreState = { currentLanguage: Language spellCheckChatInput: boolean + tokenCounterCompact: boolean huggingfaceToken?: string setHuggingfaceToken: (token: string) => void setSpellCheckChatInput: (value: boolean) => void + setTokenCounterCompact: (value: boolean) => void setCurrentLanguage: (value: Language) => void } @@ -17,8 +19,10 @@ export const useGeneralSetting = create()( (set) => ({ currentLanguage: 'en', spellCheckChatInput: true, + tokenCounterCompact: true, huggingfaceToken: undefined, setSpellCheckChatInput: (value) => set({ spellCheckChatInput: value }), + setTokenCounterCompact: (value) => set({ tokenCounterCompact: value }), setCurrentLanguage: (value) => set({ currentLanguage: value }), setHuggingfaceToken: (token) => { set({ huggingfaceToken: token }) diff --git a/web-app/src/hooks/useTokensCount.ts b/web-app/src/hooks/useTokensCount.ts new file mode 100644 index 000000000..90f740a4a --- /dev/null +++ b/web-app/src/hooks/useTokensCount.ts @@ -0,0 +1,200 @@ +import { useCallback, useState, useRef, useEffect, useMemo } from 'react' +import { ThreadMessage, ContentType } from '@janhq/core' +import { useServiceHub } from './useServiceHub' +import { useModelProvider } from './useModelProvider' +import { usePrompt } from './usePrompt' + +export interface TokenCountData { + tokenCount: number + maxTokens?: number + percentage?: number + isNearLimit: boolean + loading: boolean + error?: string +} + +export const useTokensCount = ( + messages: ThreadMessage[] = [], + uploadedFiles?: Array<{ + name: string + type: string + size: number + base64: string + dataUrl: string + }> +) => { + const [tokenData, setTokenData] = useState({ + tokenCount: 0, + loading: false, + isNearLimit: false, + }) + + const debounceTimeoutRef = useRef(undefined) + const isIncreasingContextSize = useRef(false) + const serviceHub = useServiceHub() + const { selectedModel, selectedProvider } = useModelProvider() + const { prompt } = usePrompt() + + // Create messages with current prompt for live calculation + const messagesWithPrompt = useMemo(() => { + const result = [...messages] + if (prompt.trim() || (uploadedFiles && uploadedFiles.length > 0)) { + const content = [] + + // Add text content if prompt exists + if (prompt.trim()) { + content.push({ type: ContentType.Text, text: { value: prompt } }) + } + + // Add image content for uploaded files + if (uploadedFiles && uploadedFiles.length > 0) { + uploadedFiles.forEach((file) => { + content.push({ + type: ContentType.Image, + image_url: { + url: file.dataUrl, + detail: 'high', // Default to high detail for token calculation + }, + }) + }) + } + + if (content.length > 0) { + result.push({ + id: 'temp-prompt', + thread_id: '', + role: 'user', + content, + created_at: Date.now(), + } as ThreadMessage) + } + } + return result + }, [messages, prompt, uploadedFiles]) + + // Debounced calculation that includes current prompt + const debouncedCalculateTokens = useCallback(async () => { + const modelId = selectedModel?.id + if (!modelId || selectedProvider !== 'llamacpp') { + setTokenData({ + tokenCount: 0, + loading: false, + isNearLimit: false, + }) + return + } + + // Use messages with current prompt for calculation + const messagesToCalculate = messagesWithPrompt + if (messagesToCalculate.length === 0) { + setTokenData({ + tokenCount: 0, + loading: false, + isNearLimit: false, + }) + return + } + + setTokenData((prev) => ({ ...prev, loading: true, error: undefined })) + + try { + const tokenCount = await serviceHub + .models() + .getTokensCount(modelId, messagesToCalculate) + + const maxTokensValue = + selectedModel?.settings?.ctx_len?.controller_props?.value + const maxTokensNum = + typeof maxTokensValue === 'string' + ? parseInt(maxTokensValue) + : typeof maxTokensValue === 'number' + ? maxTokensValue + : undefined + + const percentage = maxTokensNum + ? (tokenCount / maxTokensNum) * 100 + : undefined + const isNearLimit = percentage ? percentage > 85 : false + + setTokenData({ + tokenCount, + maxTokens: maxTokensNum, + percentage, + isNearLimit, + loading: false, + }) + } catch (error) { + console.error('Failed to calculate tokens:', error) + setTokenData((prev) => ({ + ...prev, + loading: false, + error: + error instanceof Error ? error.message : 'Failed to calculate tokens', + })) + } + }, [ + selectedModel?.id, + selectedProvider, + messagesWithPrompt, + serviceHub, + selectedModel?.settings?.ctx_len?.controller_props?.value, + ]) + + // Debounced effect that triggers when prompt or messages change + useEffect(() => { + // Clear existing timeout + if (debounceTimeoutRef.current) { + clearTimeout(debounceTimeoutRef.current) + } + + // Skip calculation if we're currently increasing context size + if (isIncreasingContextSize.current) { + return + } + + // Only calculate if we have messages or a prompt + if ( + messagesWithPrompt.length > 0 && + selectedProvider === 'llamacpp' && + selectedModel?.id + ) { + debounceTimeoutRef.current = setTimeout(() => { + debouncedCalculateTokens() + }, 150) // 150ms debounce for more responsive updates + } else { + // Reset immediately if no content + setTokenData({ + tokenCount: 0, + loading: false, + isNearLimit: false, + }) + } + + return () => { + if (debounceTimeoutRef.current) { + clearTimeout(debounceTimeoutRef.current) + } + } + }, [ + prompt, + messages.length, + selectedModel?.id, + selectedProvider, + messagesWithPrompt.length, + debouncedCalculateTokens, + ]) + + // Manual calculation function (for click events) + const calculateTokens = useCallback(async () => { + // Trigger the debounced calculation immediately + if (debounceTimeoutRef.current) { + clearTimeout(debounceTimeoutRef.current) + } + await debouncedCalculateTokens() + }, [debouncedCalculateTokens]) + + return { + ...tokenData, + calculateTokens, + } +} diff --git a/web-app/src/locales/de-DE/settings.json b/web-app/src/locales/de-DE/settings.json index 94c6c82a7..ec1429353 100644 --- a/web-app/src/locales/de-DE/settings.json +++ b/web-app/src/locales/de-DE/settings.json @@ -100,6 +100,8 @@ "resetAppearanceSuccessDesc": "Alle Darstellungseinstellungen wurden auf die Standardeinstellungen zurückgesetzt.", "chatWidth": "Chat Breite", "chatWidthDesc": "Passe die Breite der Chatansicht an.", + "tokenCounterCompact": "Kompakter Token-Zähler", + "tokenCounterCompactDesc": "Token-Zähler im Chat-Eingabefeld anzeigen. Wenn deaktiviert, wird der Token-Zähler unter dem Eingabefeld angezeigt.", "codeBlockTitle": "Code Block", "codeBlockDesc": "Wähle einen Stil zur Syntaxhervorhebung.", "showLineNumbers": "Zeilennummern anzeigen", diff --git a/web-app/src/locales/en/settings.json b/web-app/src/locales/en/settings.json index 44a56d9e0..bea43d2de 100644 --- a/web-app/src/locales/en/settings.json +++ b/web-app/src/locales/en/settings.json @@ -100,6 +100,8 @@ "resetAppearanceSuccessDesc": "All appearance settings have been restored to default.", "chatWidth": "Chat Width", "chatWidthDesc": "Customize the width of the chat view.", + "tokenCounterCompact": "Compact Token Counter", + "tokenCounterCompactDesc": "Show token counter inside chat input. When disabled, token counter appears below the input.", "codeBlockTitle": "Code Block", "codeBlockDesc": "Choose a syntax highlighting style.", "showLineNumbers": "Show Line Numbers", diff --git a/web-app/src/locales/vn/settings.json b/web-app/src/locales/vn/settings.json index 618aa046b..c7a92e348 100644 --- a/web-app/src/locales/vn/settings.json +++ b/web-app/src/locales/vn/settings.json @@ -100,6 +100,8 @@ "resetAppearanceSuccessDesc": "Tất cả cài đặt giao diện đã được khôi phục về mặc định.", "chatWidth": "Chiều rộng trò chuyện", "chatWidthDesc": "Tùy chỉnh chiều rộng của chế độ xem trò chuyện.", + "tokenCounterCompact": "Bộ đếm token nhỏ gọn", + "tokenCounterCompactDesc": "Hiển thị bộ đếm token bên trong ô nhập trò chuyện. Khi tắt, bộ đếm token sẽ xuất hiện bên dưới ô nhập.", "codeBlockTitle": "Khối mã", "codeBlockDesc": "Chọn kiểu tô sáng cú pháp.", "showLineNumbers": "Hiển thị số dòng", diff --git a/web-app/src/locales/zh-CN/settings.json b/web-app/src/locales/zh-CN/settings.json index d2dead089..805901044 100644 --- a/web-app/src/locales/zh-CN/settings.json +++ b/web-app/src/locales/zh-CN/settings.json @@ -100,6 +100,8 @@ "resetAppearanceSuccessDesc": "所有外观设置已恢复为默认值。", "chatWidth": "聊天宽度", "chatWidthDesc": "自定义聊天视图的宽度。", + "tokenCounterCompact": "紧凑令牌计数器", + "tokenCounterCompactDesc": "在聊天输入框内显示令牌计数器。禁用时,令牌计数器显示在输入框下方。", "codeBlockTitle": "代码块", "codeBlockDesc": "选择语法高亮样式。", "showLineNumbers": "显示行号", @@ -264,4 +266,3 @@ "updateError": "更新 Llamacpp 失败" } } - diff --git a/web-app/src/routes/settings/appearance.tsx b/web-app/src/routes/settings/appearance.tsx index 3cba3eed5..118f82d07 100644 --- a/web-app/src/routes/settings/appearance.tsx +++ b/web-app/src/routes/settings/appearance.tsx @@ -19,6 +19,7 @@ import { LineNumbersSwitcher } from '@/containers/LineNumbersSwitcher' import { CodeBlockExample } from '@/containers/CodeBlockExample' import { toast } from 'sonner' import { ChatWidthSwitcher } from '@/containers/ChatWidthSwitcher' +import { TokenCounterCompactSwitcher } from '@/containers/TokenCounterCompactSwitcher' // eslint-disable-next-line @typescript-eslint/no-explicit-any export const Route = createFileRoute(route.settings.appearance as any)({ @@ -115,6 +116,11 @@ function Appareances() { description={t('settings:appearance.chatWidthDesc')} /> + } + /> {/* Codeblock */} diff --git a/web-app/src/services/models/default.ts b/web-app/src/services/models/default.ts index 65fb17a8e..5a31f3993 100644 --- a/web-app/src/services/models/default.ts +++ b/web-app/src/services/models/default.ts @@ -9,6 +9,8 @@ import { SessionInfo, SettingComponentProps, modelInfo, + ThreadMessage, + ContentType, } from '@janhq/core' import { Model as CoreModel } from '@janhq/core' import type { @@ -544,4 +546,113 @@ export class DefaultModelsService implements ModelsService { } } } + + async getTokensCount( + modelId: string, + messages: ThreadMessage[] + ): Promise { + try { + const engine = this.getEngine('llamacpp') as AIEngine & { + getTokensCount?: (opts: { + model: string + messages: Array<{ + role: string + content: + | string + | Array<{ + type: string + text?: string + image_url?: { + detail?: string + url?: string + } + }> + }> + }) => Promise + } + + if (engine && typeof engine.getTokensCount === 'function') { + // Transform Jan's ThreadMessage format to OpenAI chat completion format + const transformedMessages = messages + .map((message) => { + // Handle different content types + let content: + | string + | Array<{ + type: string + text?: string + image_url?: { + detail?: string + url?: string + } + }> = '' + + if (message.content && message.content.length > 0) { + // Check if there are any image_url content types + const hasImages = message.content.some( + (content) => content.type === ContentType.Image + ) + + if (hasImages) { + // For multimodal messages, preserve the array structure + content = message.content.map((contentItem) => { + if (contentItem.type === ContentType.Text) { + return { + type: 'text', + text: contentItem.text?.value || '', + } + } else if (contentItem.type === ContentType.Image) { + return { + type: 'image_url', + image_url: { + detail: contentItem.image_url?.detail, + url: contentItem.image_url?.url || '', + }, + } + } + // Fallback for unknown content types + return { + type: contentItem.type, + text: contentItem.text?.value, + image_url: contentItem.image_url, + } + }) + } else { + // For text-only messages, keep the string format + const textContents = message.content + .filter( + (content) => + content.type === ContentType.Text && content.text?.value + ) + .map((content) => content.text?.value || '') + + content = textContents.join(' ') + } + } + + return { + role: message.role, + content, + } + }) + .filter((msg) => + typeof msg.content === 'string' + ? msg.content.trim() !== '' + : Array.isArray(msg.content) && msg.content.length > 0 + ) // Filter out empty messages + + return await engine.getTokensCount({ + model: modelId, + messages: transformedMessages, + }) + } + + // Fallback if method is not available + console.warn('getTokensCount method not available in llamacpp engine') + return 0 + } catch (error) { + console.error(`Error getting tokens count for model ${modelId}:`, error) + return 0 + } + } } diff --git a/web-app/src/services/models/types.ts b/web-app/src/services/models/types.ts index b7724fef2..5bf66b8bf 100644 --- a/web-app/src/services/models/types.ts +++ b/web-app/src/services/models/types.ts @@ -2,7 +2,7 @@ * Models Service Types */ -import { SessionInfo, modelInfo } from '@janhq/core' +import { SessionInfo, modelInfo, ThreadMessage } from '@janhq/core' import { Model as CoreModel } from '@janhq/core' // Types for model catalog @@ -142,4 +142,5 @@ export interface ModelsService { mmprojPath?: string, requestedCtx?: number ): Promise + getTokensCount(modelId: string, messages: ThreadMessage[]): Promise }