diff --git a/extensions/engine-management-extension/src/utils.test.ts b/extensions/engine-management-extension/src/utils.test.ts
index f48c6ad44..e453f58cb 100644
--- a/extensions/engine-management-extension/src/utils.test.ts
+++ b/extensions/engine-management-extension/src/utils.test.ts
@@ -21,7 +21,7 @@ describe('engineVariant', () => {
   it('should return mac-amd64 when platform is darwin and arch is not arm64', async () => {
     vi.stubGlobal('PLATFORM', 'darwin')
     const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: '' },
+      cpu: { arch: 'x64', instructions: [] },
       gpus: [],
       vulkan: false,
     })
@@ -31,7 +31,7 @@ describe('engineVariant', () => {
   it('should return windows-amd64-noavx-cuda-12-0 when platform is win32, cuda is enabled, and cuda version is 12', async () => {
     vi.stubGlobal('PLATFORM', 'win32')
     const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: 'avx2' },
+      cpu: { arch: 'x64', instructions: ['avx2'] },
       gpus: [
         {
           activated: true,
@@ -47,7 +47,7 @@ describe('engineVariant', () => {
   it('should return linux-amd64-noavx-cuda-11-7 when platform is linux, cuda is enabled, and cuda version is 11', async () => {
     vi.stubGlobal('PLATFORM', 'linux')
     const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: 'avx2' },
+      cpu: { arch: 'x64', instructions: [] },
       gpus: [
         {
           activated: true,
@@ -57,16 +57,34 @@ describe('engineVariant', () => {
       ],
       vulkan: false,
     })
-    expect(result).toBe('linux-amd64-avx2-cuda-11-7')
+    expect(result).toBe('linux-amd64-noavx-cuda-11-7')
   })
 
   it('should return windows-amd64-vulkan when platform is win32 and vulkan is enabled', async () => {
     vi.stubGlobal('PLATFORM', 'win32')
     const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: '' },
+      cpu: { arch: 'x64', instructions: [] },
       gpus: [{ activated: true, version: '12' }],
       vulkan: true,
     })
     expect(result).toBe('windows-amd64-vulkan')
   })
+
+  it('should return windows-amd64-avx512 when platform is win32, no gpu detected and avx512 cpu instruction is supported', async () => {
+    vi.stubGlobal('PLATFORM', 'win32')
+    const result = await engineVariant({
+      cpu: { arch: 'x64', instructions: ['avx512'] },
+      gpus: [{ activated: true, version: '12' }],
+    })
+    expect(result).toBe('windows-amd64-avx512')
+  })
+
+  it('should return windows-amd64-avx512 when platform is win32, no gpu detected and no accelerated cpu instructions are supported', async () => {
+    vi.stubGlobal('PLATFORM', 'win32')
+    const result = await engineVariant({
+      cpu: { arch: 'x64', instructions: [''] },
+      gpus: [{ activated: true, version: '12' }],
+    })
+    expect(result).toBe('windows-amd64-noavx')
+  })
 })
diff --git a/extensions/engine-management-extension/src/utils.ts b/extensions/engine-management-extension/src/utils.ts
index a80518d65..5e3f01ef7 100644
--- a/extensions/engine-management-extension/src/utils.ts
+++ b/extensions/engine-management-extension/src/utils.ts
@@ -1,20 +1,29 @@
 import { GpuSetting, log } from '@janhq/core'
 
+// Supported run modes
+enum RunMode {
+  Cuda = 'cuda',
+  CPU = 'cpu',
+}
+
+// Supported instruction sets
+const instructionBinaryNames = ['noavx', 'avx', 'avx2', 'avx512']
+
 /**
  * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
  * @param settings
  * @returns
  */
 
-const gpuRunMode = (settings?: GpuSetting): string => {
+const gpuRunMode = (settings?: GpuSetting): RunMode => {
   return settings.gpus?.some(
     (gpu) =>
-      gpu.activated === true &&
+      gpu.activated &&
       gpu.additional_information &&
       gpu.additional_information.driver_version
   )
-    ? 'cuda'
-    : ''
+    ? RunMode.Cuda
+    : RunMode.CPU
 }
 
 /**
@@ -37,13 +46,6 @@ const os = (settings?: GpuSetting): string => {
  * @returns
  */
 const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
-  const isUsingCuda =
-    settings?.vulkan !== true &&
-    settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
-    !os().includes('mac')
-
-  if (!isUsingCuda) return undefined
-  // return settings?.cuda?.version === '11' ? '11-7' : '12-0'
   return settings.gpus?.some((gpu) => gpu.version.includes('12'))
     ? '12-0'
     : '11-7'
@@ -65,6 +67,7 @@ export const engineVariant = async (
   // There is no need to append the variant extension for mac
   if (platform.startsWith('mac')) return platform
 
+  const runMode = gpuRunMode(gpuSetting)
   // Only Nvidia GPUs have addition_information set and activated by default
   let engineVariant =
     !gpuSetting?.vulkan ||
@@ -72,14 +75,23 @@ export const engineVariant = async (
     gpuSetting.gpus.some((e) => e.additional_information && e.activated)
       ? [
           platform,
-          gpuRunMode(gpuSetting) === 'cuda' &&
-          (gpuSetting.cpu.instructions.includes('avx2') ||
-            gpuSetting.cpu.instructions.includes('avx512'))
-            ? 'avx2'
-            : 'noavx',
-          gpuRunMode(gpuSetting),
-          cudaVersion(gpuSetting),
-        ].filter(Boolean) // Remove any falsy values
+          ...(runMode === RunMode.Cuda
+            ? // For cuda we only need to check if the cpu supports avx2 or noavx - since other binaries are not shipped with the extension
+              [
+                gpuSetting.cpu?.instructions.includes('avx2') ||
+                gpuSetting.cpu?.instructions.includes('avx512')
+                  ? 'avx2'
+                  : 'noavx',
+                runMode,
+                cudaVersion(gpuSetting),
+              ]
+            : // For cpu only we need to check all available supported instructions
+              [
+                (gpuSetting.cpu?.instructions ?? ['noavx']).find((e) =>
+                  instructionBinaryNames.includes(e.toLowerCase())
+                ) ?? 'noavx',
+              ]),
+        ].filter(Boolean)
       : [platform, 'vulkan']
 
   let engineVariantString = engineVariant.join('-')
diff --git a/web/containers/ModelLabel/ModelLabel.test.tsx b/web/containers/ModelLabel/ModelLabel.test.tsx
index ca5cf19dc..545fc30d8 100644
--- a/web/containers/ModelLabel/ModelLabel.test.tsx
+++ b/web/containers/ModelLabel/ModelLabel.test.tsx
@@ -1,8 +1,7 @@
 import React from 'react'
-import { render, waitFor, screen } from '@testing-library/react'
+import { render } from '@testing-library/react'
 import { useAtomValue } from 'jotai'
 import { useActiveModel } from '@/hooks/useActiveModel'
-import { useSettings } from '@/hooks/useSettings'
 import ModelLabel from '@/containers/ModelLabel'
 
 jest.mock('jotai', () => ({
@@ -14,14 +13,9 @@ jest.mock('@/hooks/useActiveModel', () => ({
   useActiveModel: jest.fn(),
 }))
 
-jest.mock('@/hooks/useSettings', () => ({
-  useSettings: jest.fn(),
-}))
-
 describe('ModelLabel', () => {
   const mockUseAtomValue = useAtomValue as jest.Mock
   const mockUseActiveModel = useActiveModel as jest.Mock
-  const mockUseSettings = useSettings as jest.Mock
 
   const defaultProps: any = {
     metadata: {
@@ -44,7 +38,6 @@ describe('ModelLabel', () => {
     mockUseActiveModel.mockReturnValue({
       activeModel: { metadata: { size: 0 } },
     })
-    mockUseSettings.mockReturnValue({ settings: { run_mode: 'cpu' } })
 
     const props = {
       ...defaultProps,
diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx
index c7c64b210..6c6c3cfda 100644
--- a/web/containers/ModelLabel/index.tsx
+++ b/web/containers/ModelLabel/index.tsx
@@ -4,8 +4,6 @@ import { useAtomValue } from 'jotai'
 
 import { useActiveModel } from '@/hooks/useActiveModel'
 
-import { useSettings } from '@/hooks/useSettings'
-
 import NotEnoughMemoryLabel from './NotEnoughMemoryLabel'
 
 import SlowOnYourDeviceLabel from './SlowOnYourDeviceLabel'
@@ -26,23 +24,23 @@ const ModelLabel = ({ size, compact }: Props) => {
   const totalRam = useAtomValue(totalRamAtom)
   const usedRam = useAtomValue(usedRamAtom)
   const availableVram = useAtomValue(availableVramAtom)
-  const { settings } = useSettings()
 
   const getLabel = (size: number) => {
     const minimumRamModel = (size * 1.25) / (1024 * 1024)
 
-    const availableRam = settings?.gpus?.some((gpu) => gpu.activated)
-      ? availableVram * 1000000 // MB to bytes
-      : totalRam -
-        (usedRam +
-          (activeModel?.metadata?.size
-            ? (activeModel.metadata.size * 1.25) / (1024 * 1024)
-            : 0))
+    const availableRam =
+      availableVram > 0
+        ? availableVram * 1000000 // MB to bytes
+        : totalRam -
+          (usedRam +
+            (activeModel?.metadata?.size
+              ? (activeModel.metadata.size * 1.25) / (1024 * 1024)
+              : 0))
 
     if (minimumRamModel > totalRam) {
       return (
         <NotEnoughMemoryLabel
-          unit={settings?.gpus?.some((gpu) => gpu.activated) ? 'VRAM' : 'RAM'}
+          unit={availableVram > 0 ? 'VRAM' : 'RAM'}
           compact={compact}
         />
       )
diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts
deleted file mode 100644
index a9635aa93..000000000
--- a/web/hooks/useSettings.ts
+++ /dev/null
@@ -1,51 +0,0 @@
-import { useCallback, useEffect, useState } from 'react'
-
-import { fs, GpuSettingInfo, joinPath } from '@janhq/core'
-
-export type AppSettings = {
-  vulkan: boolean
-  gpus: GpuSettingInfo[]
-}
-
-export const useSettings = () => {
-  const [settings, setSettings] = useState<AppSettings>()
-
-  useEffect(() => {
-    readSettings().then((settings) => setSettings(settings as AppSettings))
-
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [])
-
-  const readSettings = useCallback(async () => {
-    if (!window?.core?.api) {
-      return
-    }
-    const settingsFile = await joinPath(['file://settings', 'settings.json'])
-    if (await fs.existsSync(settingsFile)) {
-      const settings = await fs.readFileSync(settingsFile, 'utf-8')
-      return typeof settings === 'object' ? settings : JSON.parse(settings)
-    }
-    return {}
-  }, [])
-
-  const saveSettings = async ({ vulkan }: { vulkan?: boolean | undefined }) => {
-    const settingsFile = await joinPath(['file://settings', 'settings.json'])
-    const settings = await readSettings()
-    if (vulkan != null) {
-      settings.vulkan = vulkan
-      // GPU enabled, set run_mode to 'gpu'
-      if (settings.vulkan === true) {
-        settings?.gpus?.some((gpu: { activated: boolean }) =>
-          gpu.activated === true ? 'gpu' : 'cpu'
-        )
-      }
-    }
-    await fs.writeFileSync(settingsFile, JSON.stringify(settings))
-  }
-
-  return {
-    readSettings,
-    saveSettings,
-    settings,
-  }
-}
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index 7bfc81dbc..e02fc4fa8 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -18,8 +18,6 @@ import { MainViewState } from '@/constants/screens'
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 import useDownloadModel from '@/hooks/useDownloadModel'
 
-import { useSettings } from '@/hooks/useSettings'
-
 import { toGigabytes } from '@/utils/converter'
 
 import { getLogoEngine } from '@/utils/modelEngine'
@@ -53,16 +51,13 @@ const ModelItemHeader = ({ model, onSelectedModel }: Props) => {
   const setSelectedSetting = useSetAtom(selectedSettingAtom)
   const { requestCreateNewThread } = useCreateNewThread()
   const totalRam = useAtomValue(totalRamAtom)
-  const { settings } = useSettings()
 
   const nvidiaTotalVram = useAtomValue(nvidiaTotalVramAtom)
   const setMainViewState = useSetAtom(mainViewStateAtom)
 
   // Default nvidia returns vram in MB, need to convert to bytes to match the unit of totalRamW
-  let ram = nvidiaTotalVram * 1024 * 1024
-  if (ram === 0 || settings?.gpus?.some((gpu) => gpu.activated !== true)) {
-    ram = totalRam
-  }
+  const ram = nvidiaTotalVram > 0 ? nvidiaTotalVram * 1024 * 1024 : totalRam
+
   const serverEnabled = useAtomValue(serverEnabledAtom)
   const assistants = useAtomValue(assistantsAtom)
 
diff --git a/web/screens/Settings/Advanced/index.test.tsx b/web/screens/Settings/Advanced/index.test.tsx
index b43b57beb..2a762e224 100644
--- a/web/screens/Settings/Advanced/index.test.tsx
+++ b/web/screens/Settings/Advanced/index.test.tsx
@@ -22,25 +22,6 @@ global.window.core = {
   },
 }
 
-const setSettingsMock = jest.fn()
-
-// Mock useSettings hook
-jest.mock('@/hooks/useSettings', () => ({
-  __esModule: true,
-  useSettings: () => ({
-    readSettings: () => ({
-      run_mode: 'gpu',
-      experimental: false,
-      proxy: false,
-      gpus: [{ name: 'gpu-1' }, { name: 'gpu-2' }],
-      gpus_in_use: ['0'],
-      quick_ask: false,
-    }),
-    setSettings: setSettingsMock,
-  }),
-}))
-
-import * as toast from '@/containers/Toast'
 
 jest.mock('@/containers/Toast')
 
diff --git a/web/screens/Settings/Privacy/index.test.tsx b/web/screens/Settings/Privacy/index.test.tsx
index 66fa5d855..dad07d118 100644
--- a/web/screens/Settings/Privacy/index.test.tsx
+++ b/web/screens/Settings/Privacy/index.test.tsx
@@ -22,23 +22,6 @@ global.window.core = {
   },
 }
 
-const setSettingsMock = jest.fn()
-
-// Mock useSettings hook
-jest.mock('@/hooks/useSettings', () => ({
-  __esModule: true,
-  useSettings: () => ({
-    readSettings: () => ({
-      run_mode: 'gpu',
-      experimental: false,
-      proxy: false,
-      gpus: [{ name: 'gpu-1' }, { name: 'gpu-2' }],
-      gpus_in_use: ['0'],
-      quick_ask: false,
-    }),
-    setSettings: setSettingsMock,
-  }),
-}))
 
 import * as toast from '@/containers/Toast'