diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json
index df4b442c4..0fbebc197 100644
--- a/extensions/llamacpp-extension/settings.json
+++ b/extensions/llamacpp-extension/settings.json
@@ -131,11 +131,11 @@
   {
     "key": "device",
     "title": "Devices for Offload",
-    "description": "Comma-separated list of devices to use for offloading (e.g., 'cuda:0', 'cuda:0,cuda:1'). Leave empty to use default/CPU only.",
+    "description": "Comma-separated list of devices to use for offloading (e.g., 'CUDA0', 'CUDA0,CUDA1'). Leave empty to use default/CPU only.",
     "controllerType": "input",
     "controllerProps": {
       "value": "",
-      "placeholder": "cuda:0",
+      "placeholder": "CUDA0",
       "type": "text"
     }
   },
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index 673a80687..0a7bc8934 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -526,6 +526,9 @@ export default class llamacpp_extension extends AIEngine {
       const valueStr = value as string
       const [version, backend] = valueStr.split('/')
 
+      // Reset device setting when backend changes
+      this.config.device = ''
+
       const closure = async () => {
         await this.ensureBackendReady(backend, version)
       }
diff --git a/web-app/src/containers/dynamicControllerSetting/index.tsx b/web-app/src/containers/dynamicControllerSetting/index.tsx
index 4c2115399..2f575654e 100644
--- a/web-app/src/containers/dynamicControllerSetting/index.tsx
+++ b/web-app/src/containers/dynamicControllerSetting/index.tsx
@@ -10,6 +10,7 @@ type DynamicControllerProps = {
   title?: string
   className?: string
   description?: string
+  readonly?: boolean
   controllerType:
     | 'input'
     | 'checkbox'
diff --git a/web-app/src/hooks/__tests__/useHardware.test.ts b/web-app/src/hooks/__tests__/useHardware.test.ts
index 41ba6665f..ee8b182bb 100644
--- a/web-app/src/hooks/__tests__/useHardware.test.ts
+++ b/web-app/src/hooks/__tests__/useHardware.test.ts
@@ -17,26 +17,6 @@ vi.mock('@/constants/localStorage', () => ({
   },
 }))
 
-vi.mock('./useModelProvider', () => ({
-  useModelProvider: {
-    getState: () => ({
-      updateProvider: vi.fn(),
-      getProviderByName: vi.fn(() => ({
-        settings: [
-          {
-            key: 'version_backend',
-            controller_props: { value: 'cuda' },
-          },
-          {
-            key: 'device',
-            controller_props: { value: '' },
-          },
-        ],
-      })),
-    }),
-  },
-}))
-
 // Mock zustand persist
 vi.mock('zustand/middleware', () => ({
   persist: (fn: any) => fn,
@@ -253,50 +233,6 @@ describe('useHardware', () => {
     expect(result.current.pollingPaused).toBe(false)
   })
 
-  it('should get activated device string', () => {
-    const { result } = renderHook(() => useHardware())
-
-    const testHardwareData = {
-      cpu: {
-        arch: 'x86_64',
-        core_count: 8,
-        extensions: ['SSE', 'AVX'],
-        name: 'Intel Core i7',
-        usage: 25.5,
-      },
-      gpus: [
-        {
-          name: 'NVIDIA RTX 3080',
-          total_memory: 10737418240,
-          vendor: 'NVIDIA',
-          uuid: 'GPU-12345',
-          driver_version: '470.57.02',
-          activated: true,
-          nvidia_info: {
-            index: 0,
-            compute_capability: '8.6',
-          },
-          vulkan_info: {
-            index: 0,
-            device_id: 8704,
-            device_type: 'discrete',
-            api_version: '1.2.0',
-          },
-        },
-      ],
-      os_type: 'linux',
-      os_name: 'Ubuntu',
-      total_memory: 17179869184,
-    }
-
-    act(() => {
-      result.current.setHardwareData(testHardwareData)
-    })
-
-    const deviceString = result.current.getActivatedDeviceString()
-    expect(typeof deviceString).toBe('string')
-  })
-
   describe('setOS', () => {
     it('should update OS data', () => {
       const { result } = renderHook(() => useHardware())
@@ -331,202 +267,6 @@ describe('useHardware', () => {
     })
   })
 
-  describe('updateHardwareDataPreservingGpuOrder', () => {
-    it('should preserve existing GPU order and activation states', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const initialData: HardwareData = {
-        cpu: {
-          arch: 'x86_64',
-          core_count: 4,
-          extensions: [],
-          name: 'CPU',
-          usage: 0,
-        },
-        gpus: [
-          {
-            name: 'GPU 1',
-            total_memory: 8192,
-            vendor: 'NVIDIA',
-            uuid: 'gpu-1',
-            driver_version: '1.0',
-            activated: true,
-            nvidia_info: { index: 0, compute_capability: '8.0' },
-            vulkan_info: {
-              index: 0,
-              device_id: 1,
-              device_type: 'discrete',
-              api_version: '1.0',
-            },
-          },
-          {
-            name: 'GPU 2',
-            total_memory: 4096,
-            vendor: 'AMD',
-            uuid: 'gpu-2',
-            driver_version: '2.0',
-            activated: false,
-            nvidia_info: { index: 1, compute_capability: '7.0' },
-            vulkan_info: {
-              index: 1,
-              device_id: 2,
-              device_type: 'discrete',
-              api_version: '1.0',
-            },
-          },
-        ],
-        os_type: 'windows',
-        os_name: 'Windows 11',
-        total_memory: 16384,
-      }
-
-      act(() => {
-        result.current.setHardwareData(initialData)
-      })
-
-      const updatedData: HardwareData = {
-        ...initialData,
-        gpus: [
-          { ...initialData.gpus[1], name: 'GPU 2 Updated' },
-          { ...initialData.gpus[0], name: 'GPU 1 Updated' },
-        ],
-      }
-
-      act(() => {
-        result.current.updateHardwareDataPreservingGpuOrder(updatedData)
-      })
-
-      expect(result.current.hardwareData.gpus[0].uuid).toBe('gpu-1')
-      expect(result.current.hardwareData.gpus[0].name).toBe('GPU 1 Updated')
-      expect(result.current.hardwareData.gpus[0].activated).toBe(true)
-      expect(result.current.hardwareData.gpus[1].uuid).toBe('gpu-2')
-      expect(result.current.hardwareData.gpus[1].name).toBe('GPU 2 Updated')
-      expect(result.current.hardwareData.gpus[1].activated).toBe(false)
-    })
-
-    it('should add new GPUs at the end', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const initialData: HardwareData = {
-        cpu: {
-          arch: 'x86_64',
-          core_count: 4,
-          extensions: [],
-          name: 'CPU',
-          usage: 0,
-        },
-        gpus: [
-          {
-            name: 'GPU 1',
-            total_memory: 8192,
-            vendor: 'NVIDIA',
-            uuid: 'gpu-1',
-            driver_version: '1.0',
-            activated: true,
-            nvidia_info: { index: 0, compute_capability: '8.0' },
-            vulkan_info: {
-              index: 0,
-              device_id: 1,
-              device_type: 'discrete',
-              api_version: '1.0',
-            },
-          },
-        ],
-        os_type: 'windows',
-        os_name: 'Windows 11',
-        total_memory: 16384,
-      }
-
-      act(() => {
-        result.current.setHardwareData(initialData)
-      })
-
-      const updatedData: HardwareData = {
-        ...initialData,
-        gpus: [
-          ...initialData.gpus,
-          {
-            name: 'New GPU',
-            total_memory: 4096,
-            vendor: 'AMD',
-            uuid: 'gpu-new',
-            driver_version: '3.0',
-            nvidia_info: { index: 1, compute_capability: '7.0' },
-            vulkan_info: {
-              index: 1,
-              device_id: 3,
-              device_type: 'discrete',
-              api_version: '1.0',
-            },
-          },
-        ],
-      }
-
-      act(() => {
-        result.current.updateHardwareDataPreservingGpuOrder(updatedData)
-      })
-
-      expect(result.current.hardwareData.gpus).toHaveLength(2)
-      expect(result.current.hardwareData.gpus[0].uuid).toBe('gpu-1')
-      expect(result.current.hardwareData.gpus[0].activated).toBe(true)
-      expect(result.current.hardwareData.gpus[1].uuid).toBe('gpu-new')
-      expect(result.current.hardwareData.gpus[1].activated).toBe(false)
-    })
-
-    it('should initialize all GPUs as inactive when no existing data', () => {
-      const { result } = renderHook(() => useHardware())
-
-      // First clear any existing data by setting empty hardware data
-      act(() => {
-        result.current.setHardwareData({
-          cpu: { arch: '', core_count: 0, extensions: [], name: '', usage: 0 },
-          gpus: [],
-          os_type: '',
-          os_name: '',
-          total_memory: 0,
-        })
-      })
-
-      // Now we should have empty hardware state
-      expect(result.current.hardwareData.gpus.length).toBe(0)
-
-      const hardwareData: HardwareData = {
-        cpu: {
-          arch: 'x86_64',
-          core_count: 4,
-          extensions: [],
-          name: 'CPU',
-          usage: 0,
-        },
-        gpus: [
-          {
-            name: 'GPU 1',
-            total_memory: 8192,
-            vendor: 'NVIDIA',
-            uuid: 'gpu-1',
-            driver_version: '1.0',
-            nvidia_info: { index: 0, compute_capability: '8.0' },
-            vulkan_info: {
-              index: 0,
-              device_id: 1,
-              device_type: 'discrete',
-              api_version: '1.0',
-            },
-          },
-        ],
-        os_type: 'windows',
-        os_name: 'Windows 11',
-        total_memory: 16384,
-      }
-
-      act(() => {
-        result.current.updateHardwareDataPreservingGpuOrder(hardwareData)
-      })
-
-      expect(result.current.hardwareData.gpus[0].activated).toBe(false)
-    })
-  })
-
   describe('updateGPU', () => {
     it('should update specific GPU at index', () => {
       const { result } = renderHook(() => useHardware())
@@ -621,485 +361,84 @@ describe('useHardware', () => {
     })
   })
 
-  describe('reorderGPUs', () => {
-    it('should reorder GPUs correctly', () => {
+  describe('setHardwareData with GPU activation', () => {
+    it('should initialize GPUs as inactive when activated is not specified', () => {
       const { result } = renderHook(() => useHardware())
 
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: false,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
+      const hardwareData: HardwareData = {
+        cpu: {
+          arch: 'x86_64',
+          core_count: 4,
+          extensions: [],
+          name: 'CPU',
+          usage: 0,
         },
-        {
-          name: 'GPU 2',
-          total_memory: 4096,
-          vendor: 'AMD',
-          uuid: 'gpu-2',
-          driver_version: '2.0',
-          activated: false,
-          nvidia_info: { index: 1, compute_capability: '7.0' },
-          vulkan_info: {
-            index: 1,
-            device_id: 2,
-            device_type: 'discrete',
-            api_version: '1.0',
+        gpus: [
+          {
+            name: 'GPU 1',
+            total_memory: 8192,
+            vendor: 'NVIDIA',
+            uuid: 'gpu-1',
+            driver_version: '1.0',
+            nvidia_info: { index: 0, compute_capability: '8.0' },
+            vulkan_info: {
+              index: 0,
+              device_id: 1,
+              device_type: 'discrete',
+              api_version: '1.0',
+            },
           },
-        },
-        {
-          name: 'GPU 3',
-          total_memory: 6144,
-          vendor: 'Intel',
-          uuid: 'gpu-3',
-          driver_version: '3.0',
-          activated: false,
-          nvidia_info: { index: 2, compute_capability: '6.0' },
-          vulkan_info: {
-            index: 2,
-            device_id: 3,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      act(() => {
-        result.current.reorderGPUs(0, 2)
-      })
-
-      expect(result.current.hardwareData.gpus[0].uuid).toBe('gpu-2')
-      expect(result.current.hardwareData.gpus[1].uuid).toBe('gpu-3')
-      expect(result.current.hardwareData.gpus[2].uuid).toBe('gpu-1')
-    })
-
-    it('should handle invalid indices gracefully', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: false,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      const originalOrder = result.current.hardwareData.gpus
-
-      act(() => {
-        result.current.reorderGPUs(-1, 0)
-      })
-
-      expect(result.current.hardwareData.gpus).toEqual(originalOrder)
-
-      act(() => {
-        result.current.reorderGPUs(0, 5)
-      })
-
-      expect(result.current.hardwareData.gpus).toEqual(originalOrder)
-    })
-  })
-
-  describe('getActivatedDeviceString', () => {
-    it('should return empty string when no GPUs are activated', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: false,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      const deviceString = result.current.getActivatedDeviceString()
-      expect(deviceString).toBe('')
-    })
-
-    it('should return CUDA device string for NVIDIA GPUs', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: true,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      const deviceString = result.current.getActivatedDeviceString('cuda')
-      expect(deviceString).toBe('cuda:0')
-    })
-
-    it('should return Vulkan device string for Vulkan backend', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'AMD',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: true,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 1,
-            device_id: 2,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      const deviceString = result.current.getActivatedDeviceString('vulkan')
-      expect(deviceString).toBe('Vulkan1')
-    })
-
-    it('should handle mixed backend correctly', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'NVIDIA GPU',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: true,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-        {
-          name: 'AMD GPU',
-          total_memory: 4096,
-          vendor: 'AMD',
-          uuid: 'gpu-2',
-          driver_version: '2.0',
-          activated: true,
-          // AMD GPU shouldn't have nvidia_info, just vulkan_info
-          nvidia_info: { index: 1, compute_capability: '7.0' },
-          vulkan_info: {
-            index: 1,
-            device_id: 2,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      // Based on the implementation, both GPUs will use CUDA since they both have nvidia_info
-      // The test should match the actual behavior
-      const deviceString =
-        result.current.getActivatedDeviceString('cuda+vulkan')
-      expect(deviceString).toBe('cuda:0,cuda:1')
-    })
-
-    it('should return multiple device strings comma-separated', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: true,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-        {
-          name: 'GPU 2',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-2',
-          driver_version: '1.0',
-          activated: true,
-          nvidia_info: { index: 1, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 1,
-            device_id: 2,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      const deviceString = result.current.getActivatedDeviceString('cuda')
-      expect(deviceString).toBe('cuda:0,cuda:1')
-    })
-  })
-
-  describe('updateGPUActivationFromDeviceString', () => {
-    it('should activate GPUs based on device string', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: false,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-        {
-          name: 'GPU 2',
-          total_memory: 4096,
-          vendor: 'AMD',
-          uuid: 'gpu-2',
-          driver_version: '2.0',
-          activated: false,
-          nvidia_info: { index: 1, compute_capability: '7.0' },
-          vulkan_info: {
-            index: 1,
-            device_id: 2,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      act(() => {
-        result.current.updateGPUActivationFromDeviceString('cuda:0,Vulkan1')
-      })
-
-      expect(result.current.hardwareData.gpus[0].activated).toBe(true)
-      expect(result.current.hardwareData.gpus[1].activated).toBe(true)
-    })
-
-    it('should handle empty device string', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: true,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      act(() => {
-        result.current.updateGPUActivationFromDeviceString('')
-      })
-
-      expect(result.current.hardwareData.gpus[0].activated).toBe(false)
-    })
-
-    it('should handle invalid device string format', () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: false,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      act(() => {
-        result.current.updateGPUActivationFromDeviceString('invalid:format,bad')
-      })
-
-      expect(result.current.hardwareData.gpus[0].activated).toBe(false)
-    })
-  })
-
-  describe('toggleGPUActivation', () => {
-    it('should toggle GPU activation and manage loading state', async () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: false,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      expect(result.current.hardwareData.gpus[0].activated).toBe(false)
-      expect(result.current.pollingPaused).toBe(false)
-
-      await act(async () => {
-        await result.current.toggleGPUActivation(0)
-      })
-
-      expect(result.current.hardwareData.gpus[0].activated).toBe(true)
-    })
-
-    it('should handle invalid GPU index gracefully', async () => {
-      const { result } = renderHook(() => useHardware())
-
-      const gpus: GPU[] = [
-        {
-          name: 'GPU 1',
-          total_memory: 8192,
-          vendor: 'NVIDIA',
-          uuid: 'gpu-1',
-          driver_version: '1.0',
-          activated: false,
-          nvidia_info: { index: 0, compute_capability: '8.0' },
-          vulkan_info: {
-            index: 0,
-            device_id: 1,
-            device_type: 'discrete',
-            api_version: '1.0',
-          },
-        },
-      ]
-
-      act(() => {
-        result.current.setGPUs(gpus)
-      })
-
-      const originalState = result.current.hardwareData.gpus[0].activated
-
-      // Test with invalid index that doesn't throw an error
-      try {
-        await act(async () => {
-          await result.current.toggleGPUActivation(5)
-        })
-
-        expect(result.current.hardwareData.gpus[0].activated).toBe(
-          originalState
-        )
-      } catch (error) {
-        // If it throws an error due to index bounds, that's expected behavior
-        expect(result.current.hardwareData.gpus[0].activated).toBe(
-          originalState
-        )
+        ],
+        os_type: 'windows',
+        os_name: 'Windows 11',
+        total_memory: 16384,
       }
+
+      act(() => {
+        result.current.setHardwareData(hardwareData)
+      })
+
+      expect(result.current.hardwareData.gpus[0].activated).toBe(false)
+    })
+
+    it('should preserve existing activation states when set', () => {
+      const { result } = renderHook(() => useHardware())
+
+      const hardwareData: HardwareData = {
+        cpu: {
+          arch: 'x86_64',
+          core_count: 4,
+          extensions: [],
+          name: 'CPU',
+          usage: 0,
+        },
+        gpus: [
+          {
+            name: 'GPU 1',
+            total_memory: 8192,
+            vendor: 'NVIDIA',
+            uuid: 'gpu-1',
+            driver_version: '1.0',
+            activated: true,
+            nvidia_info: { index: 0, compute_capability: '8.0' },
+            vulkan_info: {
+              index: 0,
+              device_id: 1,
+              device_type: 'discrete',
+              api_version: '1.0',
+            },
+          },
+        ],
+        os_type: 'windows',
+        os_name: 'Windows 11',
+        total_memory: 16384,
+      }
+
+      act(() => {
+        result.current.setHardwareData(hardwareData)
+      })
+
+      expect(result.current.hardwareData.gpus[0].activated).toBe(true)
     })
   })
 })
diff --git a/web-app/src/hooks/__tests__/useLlamacppDevices.test.ts b/web-app/src/hooks/__tests__/useLlamacppDevices.test.ts
new file mode 100644
index 000000000..15a0afb35
--- /dev/null
+++ b/web-app/src/hooks/__tests__/useLlamacppDevices.test.ts
@@ -0,0 +1,132 @@
+import { renderHook, act } from '@testing-library/react'
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+import { useLlamacppDevices } from '../useLlamacppDevices'
+import { getLlamacppDevices } from '../../services/hardware'
+
+// Mock the hardware service
+vi.mock('@/services/hardware', () => ({
+  getLlamacppDevices: vi.fn(),
+}))
+
+// Mock the window.core object
+Object.defineProperty(window, 'core', {
+  value: {
+    extensionManager: {
+      getByName: vi.fn(),
+    },
+  },
+  writable: true,
+})
+
+describe('useLlamacppDevices', () => {
+  const mockGetLlamacppDevices = vi.mocked(getLlamacppDevices)
+
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('should initialize with default state', () => {
+    const { result } = renderHook(() => useLlamacppDevices())
+
+    expect(result.current.devices).toEqual([])
+    expect(result.current.loading).toBe(false)
+    expect(result.current.error).toBeNull()
+    expect(result.current.activatedDevices).toEqual(new Set())
+    expect(typeof result.current.fetchDevices).toBe('function')
+    expect(typeof result.current.clearError).toBe('function')
+    expect(typeof result.current.setDevices).toBe('function')
+    expect(typeof result.current.toggleDevice).toBe('function')
+    expect(typeof result.current.setActivatedDevices).toBe('function')
+  })
+
+  it('should fetch devices successfully', async () => {
+    const mockDevices = [
+      { id: 'CUDA0', name: 'NVIDIA GeForce RTX 4090', mem: 24576, free: 20480 },
+      { id: 'CUDA1', name: 'NVIDIA GeForce RTX 3080', mem: 10240, free: 8192 },
+    ]
+
+    mockGetLlamacppDevices.mockResolvedValue(mockDevices)
+
+    const { result } = renderHook(() => useLlamacppDevices())
+
+    await act(async () => {
+      await result.current.fetchDevices()
+    })
+
+    expect(result.current.devices).toEqual(mockDevices)
+    expect(result.current.loading).toBe(false)
+    expect(result.current.error).toBeNull()
+    expect(mockGetLlamacppDevices).toHaveBeenCalledOnce()
+  })
+
+  it('should clear error', () => {
+    const { result } = renderHook(() => useLlamacppDevices())
+
+    // Set an error first
+    act(() => {
+      result.current.setDevices([])
+    })
+
+    // Clear the error
+    act(() => {
+      result.current.clearError()
+    })
+
+    expect(result.current.error).toBeNull()
+  })
+
+  it('should set devices directly', () => {
+    const mockDevices = [
+      { id: 'CUDA0', name: 'NVIDIA GeForce RTX 4090', mem: 24576, free: 20480 },
+    ]
+
+    const { result } = renderHook(() => useLlamacppDevices())
+
+    act(() => {
+      result.current.setDevices(mockDevices)
+    })
+
+    expect(result.current.devices).toEqual(mockDevices)
+  })
+
+  it('should toggle device activation', () => {
+    const { result } = renderHook(() => useLlamacppDevices())
+
+    // Initially no devices are activated
+    expect(result.current.activatedDevices).toEqual(new Set())
+
+    // Toggle a device on
+    act(() => {
+      result.current.toggleDevice('CUDA0')
+    })
+
+    expect(result.current.activatedDevices).toEqual(new Set(['CUDA0']))
+
+    // Toggle the same device off
+    act(() => {
+      result.current.toggleDevice('CUDA0')
+    })
+
+    expect(result.current.activatedDevices).toEqual(new Set())
+
+    // Toggle multiple devices
+    act(() => {
+      result.current.toggleDevice('CUDA0')
+      result.current.toggleDevice('CUDA1')
+    })
+
+    expect(result.current.activatedDevices).toEqual(new Set(['CUDA0', 'CUDA1']))
+  })
+
+  it('should set activated devices', () => {
+    const { result } = renderHook(() => useLlamacppDevices())
+
+    const deviceIds = ['CUDA0', 'CUDA1', 'Vulkan0']
+
+    act(() => {
+      result.current.setActivatedDevices(deviceIds)
+    })
+
+    expect(result.current.activatedDevices).toEqual(new Set(deviceIds))
+  })
+}) 
\ No newline at end of file
diff --git a/web-app/src/hooks/useHardware.ts b/web-app/src/hooks/useHardware.ts
index a4e44cef9..013417f51 100644
--- a/web-app/src/hooks/useHardware.ts
+++ b/web-app/src/hooks/useHardware.ts
@@ -102,18 +102,12 @@ interface HardwareStore {
   // Update entire hardware data at once
   setHardwareData: (data: HardwareData) => void
 
-  // Update hardware data while preserving GPU order
-  updateHardwareDataPreservingGpuOrder: (data: HardwareData) => void
-
   // Update individual GPU
   updateGPU: (index: number, gpu: GPU) => void
 
   // Update RAM available
   updateSystemUsage: (usage: SystemUsage) => void
 
-  // Toggle GPU activation (async, with loading)
-  toggleGPUActivation: (index: number) => Promise<void>
-
   // GPU loading state
   gpuLoading: { [index: number]: boolean }
   setGpuLoading: (index: number, loading: boolean) => void
@@ -122,20 +116,11 @@ interface HardwareStore {
   pollingPaused: boolean
   pausePolling: () => void
   resumePolling: () => void
-
-  // Reorder GPUs
-  reorderGPUs: (oldIndex: number, newIndex: number) => void
-
-  // Get activated GPU device string
-  getActivatedDeviceString: (backendType?: string) => string
-
-  // Update GPU activation states from device string
-  updateGPUActivationFromDeviceString: (deviceString: string) => void
 }
 
 export const useHardware = create<HardwareStore>()(
   persist(
-    (set, get) => ({
+    (set) => ({
       hardwareData: defaultHardwareData,
       systemUsage: defaultSystemUsage,
       gpuLoading: {},
@@ -193,58 +178,6 @@ export const useHardware = create<HardwareStore>()(
           },
         }),
 
-      updateHardwareDataPreservingGpuOrder: (data) =>
-        set((state) => {
-          // If we have existing GPU data, preserve the order and activation state
-          if (state.hardwareData.gpus.length > 0) {
-            // Reorder fresh GPU data to match existing order, adding new GPUs at the end
-            const reorderedGpus: GPU[] = []
-            const processedUuids = new Set()
-
-            // First, add existing GPUs in their current order, preserving activation state
-            state.hardwareData.gpus.forEach((existingGpu) => {
-              const freshGpu = data.gpus.find(
-                (gpu) => gpu.uuid === existingGpu.uuid
-              )
-              if (freshGpu) {
-                reorderedGpus.push({
-                  ...freshGpu,
-                  activated: existingGpu.activated ?? false,
-                })
-                processedUuids.add(freshGpu.uuid)
-              }
-            })
-
-            // Then, add any new GPUs that weren't in the existing order (default to inactive)
-            data.gpus.forEach((freshGpu) => {
-              if (!processedUuids.has(freshGpu.uuid)) {
-                reorderedGpus.push({
-                  ...freshGpu,
-                  activated: false,
-                })
-              }
-            })
-
-            return {
-              hardwareData: {
-                ...data,
-                gpus: reorderedGpus,
-              },
-            }
-          } else {
-            // No existing GPU data, initialize all GPUs as inactive
-            return {
-              hardwareData: {
-                ...data,
-                gpus: data.gpus.map((gpu) => ({
-                  ...gpu,
-                  activated: false,
-                })),
-              },
-            }
-          }
-        }),
-
       updateGPU: (index, gpu) =>
         set((state) => {
           const newGPUs = [...state.hardwareData.gpus]
@@ -263,190 +196,6 @@ export const useHardware = create<HardwareStore>()(
         set(() => ({
           systemUsage,
         })),
-
-      toggleGPUActivation: async (index) => {
-        const { pausePolling, resumePolling, setGpuLoading } = get()
-        pausePolling()
-        setGpuLoading(index, true)
-
-        try {
-          await new Promise((resolve) => setTimeout(resolve, 200)) // Simulate async operation
-
-          set((state) => {
-            const newGPUs = [...state.hardwareData.gpus]
-            if (index >= 0 && index < newGPUs.length) {
-              newGPUs[index] = {
-                ...newGPUs[index],
-                activated: !newGPUs[index].activated,
-              }
-            }
-
-            return {
-              hardwareData: {
-                ...state.hardwareData,
-                gpus: newGPUs,
-              },
-            }
-          })
-
-          // Update the device setting after state change
-          const updatedState = get()
-
-          // Import and get backend type
-          const { useModelProvider } = await import('./useModelProvider')
-          const { updateProvider, getProviderByName } =
-            useModelProvider.getState()
-
-          const llamacppProvider = getProviderByName('llamacpp')
-          const backendType = llamacppProvider?.settings.find(
-            (s) => s.key === 'version_backend'
-          )?.controller_props.value as string
-
-          const deviceString =
-            updatedState.getActivatedDeviceString(backendType)
-
-          if (llamacppProvider) {
-            const updatedSettings = llamacppProvider.settings.map((setting) => {
-              if (setting.key === 'device') {
-                return {
-                  ...setting,
-                  controller_props: {
-                    ...setting.controller_props,
-                    value: deviceString,
-                  },
-                }
-              }
-              return setting
-            })
-
-            updateProvider('llamacpp', {
-              settings: updatedSettings,
-            })
-          }
-        } finally {
-          setGpuLoading(index, false)
-          setTimeout(resumePolling, 1000) // Resume polling after 1s
-        }
-      },
-
-      reorderGPUs: (oldIndex, newIndex) =>
-        set((state) => {
-          const newGPUs = [...state.hardwareData.gpus]
-          // Move the GPU from oldIndex to newIndex
-          if (
-            oldIndex >= 0 &&
-            oldIndex < newGPUs.length &&
-            newIndex >= 0 &&
-            newIndex < newGPUs.length
-          ) {
-            const [removed] = newGPUs.splice(oldIndex, 1)
-            newGPUs.splice(newIndex, 0, removed)
-          }
-          return {
-            hardwareData: {
-              ...state.hardwareData,
-              gpus: newGPUs,
-            },
-          }
-        }),
-
-      getActivatedDeviceString: (backendType?: string) => {
-        const { hardwareData } = get()
-
-        // Get activated GPUs and generate appropriate device format based on backend
-        const activatedDevices = hardwareData.gpus
-          .filter((gpu) => gpu.activated)
-          .map((gpu) => {
-            const isCudaBackend = backendType?.includes('cuda')
-            const isVulkanBackend = backendType?.includes('vulkan')
-
-            // Handle different backend scenarios
-            if (isCudaBackend && isVulkanBackend) {
-              // Mixed backend - prefer CUDA for NVIDIA GPUs, Vulkan for others
-              if (gpu.nvidia_info) {
-                return `cuda:${gpu.nvidia_info.index}`
-              } else if (gpu.vulkan_info) {
-                return `Vulkan${gpu.vulkan_info.index}`
-              }
-            } else if (isCudaBackend && gpu.nvidia_info) {
-              // CUDA backend - only use CUDA-compatible GPUs
-              return `cuda:${gpu.nvidia_info.index}`
-            } else if (isVulkanBackend && gpu.vulkan_info) {
-              // Vulkan backend - only use Vulkan-compatible GPUs
-              return `Vulkan${gpu.vulkan_info.index}`
-            } else if (!backendType) {
-              // No backend specified, use GPU's preferred type
-              if (gpu.nvidia_info) {
-                return `cuda:${gpu.nvidia_info.index}`
-              } else if (gpu.vulkan_info) {
-                return `Vulkan${gpu.vulkan_info.index}`
-              }
-            }
-            return null
-          })
-          .filter((device) => device !== null) as string[]
-
-        const deviceString = activatedDevices.join(',')
-        return deviceString
-      },
-
-      updateGPUActivationFromDeviceString: (deviceString: string) => {
-        set((state) => {
-          const newGPUs = [...state.hardwareData.gpus]
-
-          // Parse device string to get active device indices
-          const activeDevices = deviceString
-            .split(',')
-            .map((device) => device.trim())
-            .filter((device) => device.length > 0)
-            .map((device) => {
-              // Handle both formats: "cuda:0" and "Vulkan1"
-              const cudaMatch = device.match(/^cuda:(\d+)$/)
-              const vulkanMatch = device.match(/^Vulkan(\d+)$/)
-
-              if (cudaMatch) {
-                return {
-                  type: 'cuda' as const,
-                  index: parseInt(cudaMatch[1]),
-                }
-              } else if (vulkanMatch) {
-                return {
-                  type: 'vulkan' as const,
-                  index: parseInt(vulkanMatch[1]),
-                }
-              }
-              return null
-            })
-            .filter((device) => device !== null) as Array<{
-            type: 'cuda' | 'vulkan'
-            index: number
-          }>
-
-          // Update GPU activation states
-          newGPUs.forEach((gpu, gpuIndex) => {
-            const shouldBeActive = activeDevices.some((device) => {
-              if (device.type === 'cuda' && gpu.nvidia_info) {
-                return gpu.nvidia_info.index === device.index
-              } else if (device.type === 'vulkan' && gpu.vulkan_info) {
-                return gpu.vulkan_info.index === device.index
-              }
-              return false
-            })
-
-            newGPUs[gpuIndex] = {
-              ...gpu,
-              activated: shouldBeActive,
-            }
-          })
-
-          return {
-            hardwareData: {
-              ...state.hardwareData,
-              gpus: newGPUs,
-            },
-          }
-        })
-      },
     }),
     {
       name: localStorageKey.settingHardware,
diff --git a/web-app/src/hooks/useLlamacppDevices.ts b/web-app/src/hooks/useLlamacppDevices.ts
new file mode 100644
index 000000000..38e33ee18
--- /dev/null
+++ b/web-app/src/hooks/useLlamacppDevices.ts
@@ -0,0 +1,84 @@
+import { create } from 'zustand'
+import { getLlamacppDevices, DeviceList } from '@/services/hardware'
+import { updateSettings } from '@/services/providers'
+import { useModelProvider } from './useModelProvider'
+
+interface LlamacppDevicesStore {
+  devices: DeviceList[]
+  loading: boolean
+  error: string | null
+  activatedDevices: Set<string> // Track which devices are activated
+
+  // Actions
+  fetchDevices: () => Promise<void>
+  clearError: () => void
+  setDevices: (devices: DeviceList[]) => void
+  toggleDevice: (deviceId: string) => void
+  setActivatedDevices: (deviceIds: string[]) => void
+}
+
+export const useLlamacppDevices = create<LlamacppDevicesStore>((set, get) => ({
+  devices: [],
+  loading: false,
+  error: null,
+  activatedDevices: new Set(),
+
+  fetchDevices: async () => {
+    set({ loading: true, error: null })
+
+    try {
+      const devices = await getLlamacppDevices()
+      set({ devices, loading: false })
+    } catch (error) {
+      const errorMessage =
+        error instanceof Error ? error.message : 'Failed to fetch devices'
+      set({ error: errorMessage, loading: false })
+    }
+  },
+
+  clearError: () => set({ error: null }),
+
+  setDevices: (devices) => set({ devices }),
+
+  toggleDevice: async (deviceId: string) => {
+    set((state) => {
+      const newActivatedDevices = new Set(state.activatedDevices)
+      if (newActivatedDevices.has(deviceId)) {
+        newActivatedDevices.delete(deviceId)
+      } else {
+        newActivatedDevices.add(deviceId)
+      }
+      return { activatedDevices: newActivatedDevices }
+    })
+
+    // Update llamacpp provider settings
+    const { getProviderByName, updateProvider } = useModelProvider.getState()
+    const llamacppProvider = getProviderByName('llamacpp')
+
+    if (llamacppProvider) {
+      const deviceString = Array.from(get().activatedDevices).join(',')
+
+      const updatedSettings = llamacppProvider.settings.map((setting) => {
+        if (setting.key === 'device') {
+          return {
+            ...setting,
+            controller_props: {
+              ...setting.controller_props,
+              value: deviceString,
+            },
+          }
+        }
+        return setting
+      })
+
+      await updateSettings('llamacpp', updatedSettings)
+      updateProvider('llamacpp', {
+        settings: updatedSettings,
+      })
+    }
+  },
+
+  setActivatedDevices: (deviceIds: string[]) => {
+    set({ activatedDevices: new Set(deviceIds) })
+  },
+}))
diff --git a/web-app/src/routes/settings/hardware.tsx b/web-app/src/routes/settings/hardware.tsx
index 9c174a259..242d92139 100644
--- a/web-app/src/routes/settings/hardware.tsx
+++ b/web-app/src/routes/settings/hardware.tsx
@@ -7,258 +7,87 @@ import { Switch } from '@/components/ui/switch'
 import { Progress } from '@/components/ui/progress'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useHardware } from '@/hooks/useHardware'
-// import { useVulkan } from '@/hooks/useVulkan'
-import type { GPU, HardwareData } from '@/hooks/useHardware'
-import { useEffect, useState } from 'react'
-import {
-  DndContext,
-  closestCenter,
-  KeyboardSensor,
-  PointerSensor,
-  useSensor,
-  useSensors,
-  DragEndEvent,
-} from '@dnd-kit/core'
-import {
-  SortableContext,
-  verticalListSortingStrategy,
-  useSortable,
-} from '@dnd-kit/sortable'
-import { CSS } from '@dnd-kit/utilities'
-import {
-  IconGripVertical,
-  IconDeviceDesktopAnalytics,
-} from '@tabler/icons-react'
-import { getHardwareInfo, getSystemUsage } from '@/services/hardware'
+import { useLlamacppDevices } from '@/hooks/useLlamacppDevices'
+import { useEffect } from 'react'
+import { IconDeviceDesktopAnalytics } from '@tabler/icons-react'
+import { getSystemUsage } from '@/services/hardware'
 import { WebviewWindow } from '@tauri-apps/api/webviewWindow'
 import { formatMegaBytes } from '@/lib/utils'
 import { windowKey } from '@/constants/windows'
 import { toNumber } from '@/utils/number'
 import { useModelProvider } from '@/hooks/useModelProvider'
+import { stopAllModels } from '@/services/models'
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export const Route = createFileRoute(route.settings.hardware as any)({
   component: Hardware,
 })
 
-function SortableGPUItem({ gpu, index, isCompatible, isActivated }: { gpu: GPU; index: number; isCompatible: boolean; isActivated: boolean }) {
-  const {
-    attributes,
-    listeners,
-    setNodeRef,
-    transform,
-    transition,
-    isDragging,
-  } = useSortable({ id: index })
-  const { t } = useTranslation()
-
-  const { systemUsage, toggleGPUActivation, gpuLoading } = useHardware()
-  const usage = systemUsage.gpus[index]
-
-  const style = {
-    transform: CSS.Transform.toString(transform),
-    transition,
-    opacity: isDragging ? 0.5 : 1,
-    position: 'relative' as const,
-    zIndex: isDragging ? 1 : 0,
-  }
-
-  return (
-    <div ref={setNodeRef} style={style} className={`mb-4 last:mb-0 ${!isCompatible ? 'opacity-60' : ''}`}>
-      <CardItem
-        title={
-          <div className="flex items-center gap-2">
-            <div
-              {...attributes}
-              {...listeners}
-              className="size-6 cursor-move flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
-            >
-              <IconGripVertical size={18} className="text-main-view-fg/60" />
-            </div>
-            <span className="text-main-view-fg/80">{gpu.name}</span>
-            {!isCompatible && (
-              <span className="text-xs bg-destructive/10 text-destructive px-2 py-1 rounded-sm">
-                Incompatible with current backend
-              </span>
-            )}
-          </div>
-        }
-        actions={
-          <div className="flex items-center gap-4">
-            <Switch
-              checked={isActivated}
-              disabled={!!gpuLoading[index] || !isCompatible}
-              onCheckedChange={() => toggleGPUActivation(index)}
-            />
-          </div>
-        }
-      />
-      <div className="ml-8 mt-3">
-        <CardItem
-          title={t('settings:hardware.vram')}
-          actions={
-            <span className="text-main-view-fg/80">
-              {formatMegaBytes(usage?.used_memory)}{' '}
-              {t('settings:hardware.freeOf')}{' '}
-              {formatMegaBytes(gpu.total_memory)}
-            </span>
-          }
-        />
-        <CardItem
-          title={t('settings:hardware.driverVersion')}
-          actions={
-            <span className="text-main-view-fg/80">
-              {gpu.driver_version?.slice(0, 50) || '-'}
-            </span>
-          }
-        />
-        <CardItem
-          title={t('settings:hardware.computeCapability')}
-          actions={
-            <span className="text-main-view-fg/80">
-              {gpu.nvidia_info?.compute_capability ??
-                gpu.vulkan_info?.api_version}
-            </span>
-          }
-        />
-      </div>
-    </div>
-  )
-}
-
 function Hardware() {
   const { t } = useTranslation()
   const {
     hardwareData,
     systemUsage,
     setHardwareData,
-    updateHardwareDataPreservingGpuOrder,
     updateSystemUsage,
-    reorderGPUs,
     pollingPaused,
   } = useHardware()
-  // const { vulkanEnabled, setVulkanEnabled } = useVulkan()
 
   const { providers } = useModelProvider()
   const llamacpp = providers.find((p) => p.provider === 'llamacpp')
-  const versionBackend = llamacpp?.settings.find((s) => s.key === "version_backend")?.controller_props.value
 
-  // Determine backend type and filter GPUs accordingly
-  const isCudaBackend = typeof versionBackend === 'string' && versionBackend.includes('cuda')
-  const isVulkanBackend = typeof versionBackend === 'string' && versionBackend.includes('vulkan')
-
-  // Filter and prepare GPUs based on backend
-  const getFilteredGPUs = () => {
-    // Always show all GPUs, but compatibility will be determined by isGPUActive
-    return hardwareData.gpus
-  }
-
-  const filteredGPUs = getFilteredGPUs()
-
-  // Check if GPU should be active based on backend compatibility
-  const isGPUCompatible = (gpu: GPU) => {
-    if (isCudaBackend) {
-      return gpu.nvidia_info !== null
-    } else if (isVulkanBackend) {
-      return gpu.vulkan_info !== null
-    } else {
-      // No valid backend - all GPUs are inactive
-      return false
-    }
-  }
-
-  // Check if GPU is actually activated
-  const isGPUActive = (gpu: GPU) => {
-    return isGPUCompatible(gpu) && (gpu.activated ?? false)
-  }
+  // Llamacpp devices hook
+  const {
+    devices: llamacppDevices,
+    loading: llamacppDevicesLoading,
+    error: llamacppDevicesError,
+    activatedDevices,
+    toggleDevice,
+    fetchDevices,
+  } = useLlamacppDevices()
 
+  // Fetch llamacpp devices when component mounts
   useEffect(() => {
-    getHardwareInfo().then((freshData) => {
-      const data = freshData as unknown as HardwareData
-      updateHardwareDataPreservingGpuOrder(data)
-    })
-  }, [updateHardwareDataPreservingGpuOrder])
+    fetchDevices()
+  }, [fetchDevices])
 
-  // Hardware and provider sync logic
-  const { getActivatedDeviceString, updateGPUActivationFromDeviceString } = useHardware()
-  const { updateProvider, getProviderByName } = useModelProvider()
-  const [isInitialized, setIsInitialized] = useState(false)
+  const { getProviderByName } = useModelProvider()
 
-  // Initialize GPU activations from device setting on first load
+  // Initialize llamacpp device activations from provider settings
   useEffect(() => {
-    if (hardwareData.gpus.length > 0 && !isInitialized) {
+    if (llamacppDevices.length > 0 && activatedDevices.size === 0) {
       const llamacppProvider = getProviderByName('llamacpp')
-      const currentDeviceSetting = llamacppProvider?.settings.find(s => s.key === 'device')?.controller_props.value as string
-      
+      const currentDeviceSetting = llamacppProvider?.settings.find(
+        (s) => s.key === 'device'
+      )?.controller_props.value as string
+
       if (currentDeviceSetting) {
-        console.log(`Initializing GPU activations from device setting: "${currentDeviceSetting}"`)
-        updateGPUActivationFromDeviceString(currentDeviceSetting)
-      }
-      
-      setIsInitialized(true)
-    }
-  }, [hardwareData.gpus.length, isInitialized, getProviderByName, updateGPUActivationFromDeviceString])
+        const deviceIds = currentDeviceSetting
+          .split(',')
+          .map((device) => device.trim())
+          .filter((device) => device.length > 0)
 
-  // Sync device setting when GPU activations change (only after initialization)
-  const gpuActivationStates = hardwareData.gpus.map(gpu => gpu.activated)
-  
-  useEffect(() => {
-    if (isInitialized && hardwareData.gpus.length > 0) {
-      const llamacppProvider = getProviderByName('llamacpp')
-      const backendType = llamacppProvider?.settings.find(s => s.key === 'version_backend')?.controller_props.value as string
-      const deviceString = getActivatedDeviceString(backendType)
-      
-      if (llamacppProvider) {
-        const currentDeviceSetting = llamacppProvider.settings.find(s => s.key === 'device')
-        
-        // Sync device string when GPU activations change (only after initialization)
-        if (currentDeviceSetting && currentDeviceSetting.controller_props.value !== deviceString) {
-          console.log(`Syncing device string from "${currentDeviceSetting.controller_props.value}" to "${deviceString}"`)
-          
-          const updatedSettings = llamacppProvider.settings.map(setting => {
-            if (setting.key === 'device') {
-              return {
-                ...setting,
-                controller_props: {
-                  ...setting.controller_props,
-                  value: deviceString
-                }
-              }
-            }
-            return setting
-          })
-          
-          updateProvider('llamacpp', {
-            settings: updatedSettings
-          })
+        // Find matching devices by ID
+        const matchingDeviceIds = deviceIds.filter((deviceId) =>
+          llamacppDevices.some((device) => device.id === deviceId)
+        )
+
+        if (matchingDeviceIds.length > 0) {
+          console.log(
+            `Initializing llamacpp device activations from device setting: "${currentDeviceSetting}"`
+          )
+          // Update the activatedDevices in the hook
+          const { setActivatedDevices } = useLlamacppDevices.getState()
+          setActivatedDevices(matchingDeviceIds)
         }
       }
     }
-  }, [isInitialized, gpuActivationStates, versionBackend, getActivatedDeviceString, updateProvider, getProviderByName, hardwareData.gpus.length])
-
-  // Set up DnD sensors
-  const sensors = useSensors(
-    useSensor(PointerSensor),
-    useSensor(KeyboardSensor)
-  )
-
-  // Handle drag end event
-  const handleDragEnd = (event: DragEndEvent) => {
-    const { active, over } = event
-
-    if (over && active.id !== over.id) {
-      // Find the actual indices in the original hardwareData.gpus array
-      const activeGpu = filteredGPUs[active.id as number]
-      const overGpu = filteredGPUs[over.id as number]
-      
-      const oldIndex = hardwareData.gpus.findIndex(gpu => gpu.uuid === activeGpu.uuid)
-      const newIndex = hardwareData.gpus.findIndex(gpu => gpu.uuid === overGpu.uuid)
-
-      if (oldIndex !== -1 && newIndex !== -1) {
-        reorderGPUs(oldIndex, newIndex)
-      }
-    }
-  }
+  }, [
+    llamacppDevices.length,
+    activatedDevices.size,
+    getProviderByName,
+    llamacppDevices,
+  ])
 
   useEffect(() => {
     if (pollingPaused) return
@@ -452,64 +281,64 @@ function Hardware() {
               />
             </Card>
 
-            {/* Vulkan Settings */}
-            {/* {hardwareData.gpus.length > 0 && (
-              <Card title={t('settings:hardware.vulkan')}>
-                <CardItem
-                  title={t('settings:hardware.enableVulkan')}
-                  description={t('settings:hardware.enableVulkanDesc')}
-                  actions={
-                    <div className="flex items-center gap-4">
-                      <Switch
-                        checked={vulkanEnabled}
-                        onCheckedChange={(checked) => {
-                          setVulkanEnabled(checked)
-                          setTimeout(() => {
-                            window.location.reload()
-                          }, 500) // Reload after 500ms to apply changes
-                        }}
-                      />
-                    </div>
-                  }
-                />
-              </Card>
-            )} */}
-
-            {/* GPU Information */}
-            {!IS_MACOS ? (
-              <Card title={t('settings:hardware.gpus')}>
-             
-                
-                {hardwareData.gpus.length > 0 ? (
-                  <DndContext
-                    sensors={sensors}
-                    collisionDetection={closestCenter}
-                    onDragEnd={handleDragEnd}
-                  >
-                    <SortableContext
-                      items={filteredGPUs.map((_, index) => index)}
-                      strategy={verticalListSortingStrategy}
-                    >
-                      {filteredGPUs.map((gpu, index) => (
-                        <SortableGPUItem 
-                          key={index} 
-                          gpu={gpu} 
-                          index={index} 
-                          isCompatible={isGPUCompatible(gpu)} 
-                          isActivated={isGPUActive(gpu)} 
-                        />
-                      ))}
-                    </SortableContext>
-                  </DndContext>
-                ) : (
+            {/* Llamacpp Devices Information */}
+            {!IS_MACOS && llamacpp && (
+              <Card title="GPUs">
+                {llamacppDevicesLoading ? (
+                  <CardItem title="Loading devices..." actions={<></>} />
+                ) : llamacppDevicesError ? (
                   <CardItem
-                    title={t('settings:hardware.noGpus')}
-                    actions={<></>}
+                    title="Error loading devices"
+                    actions={
+                      <span className="text-destructive text-sm">
+                        {llamacppDevicesError}
+                      </span>
+                    }
                   />
+                ) : llamacppDevices.length > 0 ? (
+                  llamacppDevices.map((device, index) => (
+                    <Card key={index}>
+                      <CardItem
+                        title={device.name}
+                        actions={
+                          <div className="flex items-center gap-4">
+                            {/* <div className="flex flex-col items-end gap-1">
+                            <span className="text-main-view-fg/80 text-sm">
+                              ID: {device.id}
+                            </span>
+                            <span className="text-main-view-fg/80 text-sm">
+                              Memory: {formatMegaBytes(device.mem)} /{' '}
+                              {formatMegaBytes(device.free)} free
+                            </span>
+                          </div> */}
+                            <Switch
+                              checked={activatedDevices.has(device.id)}
+                              onCheckedChange={() => {
+                                toggleDevice(device.id)
+                                stopAllModels()
+                              }}
+                            />
+                          </div>
+                        }
+                      />
+                      <div className="mt-3">
+                        <CardItem
+                          title={t('settings:hardware.vram')}
+                          actions={
+                            <span className="text-main-view-fg/80">
+                              {formatMegaBytes(device.mem)}{' '}
+                              {t('settings:hardware.freeOf')}{' '}
+                              {formatMegaBytes(device.free)}
+                            </span>
+                          }
+                        />
+                      </div>
+                    </Card>
+                  ))
+                ) : (
+                  <CardItem title="No devices found" actions={<></>} />
                 )}
               </Card>
-            ) : (
-              <></>
             )}
           </div>
         </div>
diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx
index 2681054ac..4e2a21b16 100644
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@@ -2,7 +2,6 @@ import { Card, CardItem } from '@/containers/Card'
 import HeaderPage from '@/containers/HeaderPage'
 import SettingsMenu from '@/containers/SettingsMenu'
 import { useModelProvider } from '@/hooks/useModelProvider'
-import { useHardware } from '@/hooks/useHardware'
 import { cn, getProviderTitle } from '@/lib/utils'
 import { open } from '@tauri-apps/plugin-dialog'
 import {
@@ -39,6 +38,7 @@ import { toast } from 'sonner'
 import { useEffect, useState } from 'react'
 import { predefinedProviders } from '@/consts/providers'
 import { useModelLoad } from '@/hooks/useModelLoad'
+import { useLlamacppDevices } from '@/hooks/useLlamacppDevices'
 
 // as route.threadsDetail
 export const Route = createFileRoute('/settings/providers/$providerName')({
@@ -80,7 +80,6 @@ function ProviderDetail() {
   const [refreshingModels, setRefreshingModels] = useState(false)
   const { providerName } = useParams({ from: Route.id })
   const { getProviderByName, setProviders, updateProvider } = useModelProvider()
-  const { updateGPUActivationFromDeviceString } = useHardware()
   const provider = getProviderByName(providerName)
   const isSetup = step === 'setup_remote_provider'
   const navigate = useNavigate()
@@ -256,7 +255,8 @@ function ProviderDetail() {
                           controllerProps={setting.controller_props}
                           className={cn(
                             setting.key === 'api-key' &&
-                              'third-step-setup-remote-provider'
+                              'third-step-setup-remote-provider',
+                            setting.key === 'device' && 'hidden'
                           )}
                           onChange={(newValue) => {
                             if (provider) {
@@ -288,16 +288,28 @@ function ProviderDetail() {
                                 updateObj.base_url = newValue
                               }
 
-                              // Special handling for device setting changes
-                              if (
-                                settingKey === 'device' &&
-                                typeof newValue === 'string' &&
-                                provider.provider === 'llamacpp'
-                              ) {
-                                console.log(
-                                  `Device setting manually changed to: "${newValue}"`
-                                )
-                                updateGPUActivationFromDeviceString(newValue)
+                              // Reset device setting to empty when backend version changes
+                              if (settingKey === 'version_backend') {
+                                const deviceSettingIndex =
+                                  newSettings.findIndex(
+                                    (s) => s.key === 'device'
+                                  )
+
+                                if (deviceSettingIndex !== -1) {
+                                  ;(
+                                    newSettings[deviceSettingIndex]
+                                      .controller_props as {
+                                      value: string
+                                    }
+                                  ).value = ''
+                                }
+
+                                // Reset llamacpp device activations when backend version changes
+                                if (providerName === 'llamacpp') {
+                                  const { setActivatedDevices } =
+                                    useLlamacppDevices.getState()
+                                  setActivatedDevices([])
+                                }
                               }
 
                               updateSettings(
diff --git a/web-app/src/routes/system-monitor.tsx b/web-app/src/routes/system-monitor.tsx
index 1c7eb4410..46717f93d 100644
--- a/web-app/src/routes/system-monitor.tsx
+++ b/web-app/src/routes/system-monitor.tsx
@@ -2,16 +2,13 @@
 import { createFileRoute } from '@tanstack/react-router'
 import { useEffect, useState } from 'react'
 import { useHardware } from '@/hooks/useHardware'
-import { getHardwareInfo, getSystemUsage } from '@/services/hardware'
 import { Progress } from '@/components/ui/progress'
-import type { HardwareData } from '@/hooks/useHardware'
 import { route } from '@/constants/routes'
 import { formatMegaBytes } from '@/lib/utils'
 import { IconDeviceDesktopAnalytics } from '@tabler/icons-react'
-import { getActiveModels, stopModel } from '@/services/models'
-import { Button } from '@/components/ui/button'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 import { toNumber } from '@/utils/number'
+import { useLlamacppDevices } from '@/hooks/useLlamacppDevices'
 import { useModelProvider } from '@/hooks/useModelProvider'
 
 export const Route = createFileRoute(route.systemMonitor as any)({
@@ -20,126 +17,66 @@ export const Route = createFileRoute(route.systemMonitor as any)({
 
 function SystemMonitor() {
   const { t } = useTranslation()
+  const { hardwareData, systemUsage, updateSystemUsage } = useHardware()
+
   const {
-    hardwareData,
-    systemUsage,
-    updateHardwareDataPreservingGpuOrder,
-    updateSystemUsage,
-    updateGPUActivationFromDeviceString,
-  } = useHardware()
-  const [activeModels, setActiveModels] = useState<string[]>([])
-  const { providers, getProviderByName } = useModelProvider()
+    devices: llamacppDevices,
+    activatedDevices,
+    fetchDevices,
+    setActivatedDevices,
+  } = useLlamacppDevices()
+  const { getProviderByName } = useModelProvider()
+
   const [isInitialized, setIsInitialized] = useState(false)
 
-  // Determine backend type and filter GPUs accordingly (same logic as hardware.tsx)
-  const llamacpp = providers.find((p) => p.provider === 'llamacpp')
-  const versionBackend = llamacpp?.settings.find(
-    (s) => s.key === 'version_backend'
-  )?.controller_props.value
-
   useEffect(() => {
-    // Initial data fetch - use updateHardwareDataPreservingGpuOrder like hardware.tsx
-    getHardwareInfo().then((data) => {
-      updateHardwareDataPreservingGpuOrder(data as unknown as HardwareData)
-    })
-    getActiveModels().then((models) => setActiveModels(models || []))
+    // Fetch llamacpp devices
+    fetchDevices()
+  }, [updateSystemUsage, fetchDevices])
 
-    // Set up interval for real-time updates
-    const intervalId = setInterval(() => {
-      getSystemUsage().then((data) => {
-        updateSystemUsage(data)
-      })
-      getActiveModels().then((models) => setActiveModels(models || []))
-    }, 5000)
-
-    return () => clearInterval(intervalId)
-  }, [updateHardwareDataPreservingGpuOrder, setActiveModels, updateSystemUsage])
-
-  // Initialize GPU activations from device setting on first load (same logic as hardware.tsx)
+  // Initialize when hardware data and llamacpp devices are available
   useEffect(() => {
     if (hardwareData.gpus.length > 0 && !isInitialized) {
+      setIsInitialized(true)
+    }
+  }, [hardwareData.gpus.length, isInitialized])
+
+  // Initialize llamacpp device activations from provider settings
+  useEffect(() => {
+    if (llamacppDevices.length > 0 && activatedDevices.size === 0) {
       const llamacppProvider = getProviderByName('llamacpp')
       const currentDeviceSetting = llamacppProvider?.settings.find(
         (s) => s.key === 'device'
       )?.controller_props.value as string
 
       if (currentDeviceSetting) {
-        updateGPUActivationFromDeviceString(currentDeviceSetting)
-      }
+        const deviceIds = currentDeviceSetting
+          .split(',')
+          .map((device) => device.trim())
+          .filter((device) => device.length > 0)
 
-      setIsInitialized(true)
-    }
-  }, [
-    hardwareData.gpus.length,
-    isInitialized,
-    getProviderByName,
-    updateGPUActivationFromDeviceString,
-  ])
-
-  // Sync device setting when GPU activations change (only after initialization) - same logic as hardware.tsx
-  const { getActivatedDeviceString } = useHardware()
-  const { updateProvider } = useModelProvider()
-  const gpuActivationStates = hardwareData.gpus.map((gpu) => gpu.activated)
-
-  useEffect(() => {
-    if (isInitialized && hardwareData.gpus.length > 0) {
-      const llamacppProvider = getProviderByName('llamacpp')
-      const backendType = llamacppProvider?.settings.find(
-        (s) => s.key === 'version_backend'
-      )?.controller_props.value as string
-      const deviceString = getActivatedDeviceString(backendType)
-
-      if (llamacppProvider) {
-        const currentDeviceSetting = llamacppProvider.settings.find(
-          (s) => s.key === 'device'
+        // Find matching devices by ID
+        const matchingDeviceIds = deviceIds.filter((deviceId) =>
+          llamacppDevices.some((device) => device.id === deviceId)
         )
 
-        // Sync device string when GPU activations change (only after initialization)
-        if (
-          currentDeviceSetting &&
-          currentDeviceSetting.controller_props.value !== deviceString
-        ) {
-          const updatedSettings = llamacppProvider.settings.map((setting) => {
-            if (setting.key === 'device') {
-              return {
-                ...setting,
-                controller_props: {
-                  ...setting.controller_props,
-                  value: deviceString,
-                },
-              }
-            }
-            return setting
-          })
-
-          updateProvider('llamacpp', {
-            settings: updatedSettings,
-          })
+        if (matchingDeviceIds.length > 0) {
+          console.log(
+            `Initializing llamacpp device activations from device setting: "${currentDeviceSetting}"`
+          )
+          // Update the activatedDevices in the hook
+          setActivatedDevices(matchingDeviceIds)
         }
       }
     }
   }, [
-    isInitialized,
-    gpuActivationStates,
-    versionBackend,
-    getActivatedDeviceString,
-    updateProvider,
+    llamacppDevices.length,
+    activatedDevices.size,
     getProviderByName,
-    hardwareData.gpus.length,
+    llamacppDevices,
+    setActivatedDevices,
   ])
 
-  const stopRunningModel = (modelId: string) => {
-    stopModel(modelId)
-      .then(() => {
-        setActiveModels((prevModels) =>
-          prevModels.filter((model) => model !== modelId)
-        )
-      })
-      .catch((error) => {
-        console.error('Error stopping model:', error)
-      })
-  }
-
   // Calculate RAM usage percentage
   const ramUsagePercentage =
     toNumber(
@@ -147,35 +84,6 @@ function SystemMonitor() {
         hardwareData.total_memory
     ) * 100
 
-  // Determine backend type and filter GPUs accordingly
-  const isCudaBackend =
-    typeof versionBackend === 'string' && versionBackend.includes('cuda')
-  const isVulkanBackend =
-    typeof versionBackend === 'string' && versionBackend.includes('vulkan')
-
-  // Check if GPU should be active based on backend compatibility
-  const isGPUCompatible = (gpu: any) => {
-    if (isCudaBackend) {
-      return gpu.nvidia_info !== null
-    } else if (isVulkanBackend) {
-      return gpu.vulkan_info !== null
-    } else {
-      // No valid backend - all GPUs are inactive
-      return false
-    }
-  }
-
-  // Check if GPU is actually activated
-  const isGPUActive = (gpu: any) => {
-    const compatible = isGPUCompatible(gpu)
-    const activated = gpu.activated ?? false
-    const result = compatible && activated
-    return result
-  }
-
-  // Filter to show only active GPUs
-  const activeGPUs = hardwareData.gpus.filter((gpu) => isGPUActive(gpu))
-
   return (
     <div className="flex flex-col h-full bg-main-view overflow-y-auto p-6">
       <div className="flex items-center mb-4 gap-2">
@@ -185,7 +93,7 @@ function SystemMonitor() {
         </h1>
       </div>
 
-      <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+      <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
         {/* CPU Usage Card */}
         <div className="bg-main-view-fg/2 rounded-lg p-6 shadow-sm">
           <h2 className="text-base font-semibold text-main-view-fg mb-4">
@@ -273,150 +181,51 @@ function SystemMonitor() {
             </div>
           </div>
         </div>
-      </div>
 
-      {/* Current Active Model Section */}
-      <div className="mt-6 bg-main-view-fg/2 rounded-lg p-6 shadow-sm">
-        <h2 className="text-base font-semibold text-main-view-fg mb-4">
-          {t('system-monitor:runningModels')}
-        </h2>
-        {activeModels.length === 0 && (
-          <div className="text-center text-main-view-fg/50 py-4">
-            {t('system-monitor:noRunningModels')}
-          </div>
-        )}
-        {activeModels.length > 0 && (
-          <div className="flex flex-col gap-4">
-            {activeModels.map((model) => (
-              <div className="bg-main-view-fg/3 rounded-lg p-4" key={model}>
-                <div className="flex justify-between items-center mb-2">
-                  <span className="font-semibold text-main-view-fg">
-                    {model}
-                  </span>
-                </div>
-                <div className="flex flex-col gap-2 mt-3">
+        {/* GPU Usage Card */}
+        <div className="bg-main-view-fg/2 rounded-lg p-6 shadow-sm">
+          <h2 className="text-base font-semibold text-main-view-fg mb-4">
+            {t('system-monitor:activeGpus')}
+          </h2>
+          <div className="flex flex-col gap-2">
+            {llamacppDevices.length > 0 ? (
+              llamacppDevices.map((device) => (
+                <div key={device.id} className="flex flex-col gap-1">
                   <div className="flex justify-between items-center">
-                    <span className="text-main-view-fg/70">
-                      {t('system-monitor:provider')}
+                    <span className="text-main-view-fg/70">{device.name}</span>
+                    <span
+                      className={`text-sm px-2 py-1 rounded-md ${
+                        activatedDevices.has(device.id)
+                          ? 'bg-green-500/20 text-green-600 dark:text-green-400'
+                          : 'hidden'
+                      }`}
+                    >
+                      {activatedDevices.has(device.id)
+                        ? t('system-monitor:active')
+                        : 'Inactive'}
                     </span>
-                    <span className="text-main-view-fg">llama.cpp</span>
                   </div>
-                  <div className="flex justify-between items-center">
-                    <span className="text-main-view-fg/70">
-                      {t('system-monitor:uptime')}
-                    </span>
-                    {/* <span className="text-main-view-fg">
-                      {model.start_time && formatDuration(model.start_time)}
-                    </span> */}
-                  </div>
-                  <div className="flex justify-between items-center">
-                    <span className="text-main-view-fg/70">
-                      {t('system-monitor:actions')}
-                    </span>
+                  <div className="flex justify-between items-center text-sm">
+                    <span className="text-main-view-fg/70">VRAM:</span>
                     <span className="text-main-view-fg">
-                      <Button
-                        variant="destructive"
-                        size="sm"
-                        onClick={() => stopRunningModel(model)}
-                      >
-                        {t('system-monitor:stop')}
-                      </Button>
+                      {formatMegaBytes(device.mem)}
+                    </span>
+                  </div>
+                  <div className="flex justify-between items-center text-sm">
+                    <span className="text-main-view-fg/70">Free:</span>
+                    <span className="text-main-view-fg">
+                      {formatMegaBytes(device.free)}
                     </span>
                   </div>
                 </div>
+              ))
+            ) : (
+              <div className="text-main-view-fg/70 text-center py-4">
+                {t('system-monitor:noGpus')}
               </div>
-            ))}
+            )}
           </div>
-        )}
-      </div>
-
-      {/* Active GPUs Section */}
-      <div className="mt-6 bg-main-view-fg/2 rounded-lg p-6 shadow-sm">
-        <h2 className="text-base font-semibold text-main-view-fg mb-4">
-          {t('system-monitor:activeGpus')}
-        </h2>
-        {!isInitialized ? (
-          <div className="text-center text-main-view-fg/50 py-4">
-            Initializing GPU states...
-          </div>
-        ) : activeGPUs.length > 0 ? (
-          <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
-            {activeGPUs.map((gpu, index) => {
-              // Find the corresponding system usage data for this GPU
-              const gpuUsage = systemUsage.gpus.find(
-                (usage) => usage.uuid === gpu.uuid
-              )
-
-              return (
-                <div
-                  key={gpu.uuid || index}
-                  className="bg-main-view-fg/3 rounded-lg p-4"
-                >
-                  <div className="flex justify-between items-center mb-2">
-                    <span className="font-semibold text-main-view-fg">
-                      {gpu.name}
-                    </span>
-                    <div className="bg-green-500/20 px-2 py-1 rounded-sm">
-                      {t('system-monitor:active')}
-                    </div>
-                  </div>
-                  <div className="flex flex-col gap-2 mt-3">
-                    <div className="flex justify-between items-center">
-                      <span className="text-main-view-fg/70">
-                        {t('system-monitor:vramUsage')}
-                      </span>
-                      <span className="text-main-view-fg">
-                        {gpuUsage ? (
-                          <>
-                            {formatMegaBytes(gpuUsage.used_memory)} /{' '}
-                            {formatMegaBytes(gpu.total_memory)}
-                          </>
-                        ) : (
-                          <>
-                            {formatMegaBytes(0)} /{' '}
-                            {formatMegaBytes(gpu.total_memory)}
-                          </>
-                        )}
-                      </span>
-                    </div>
-                    <div className="flex justify-between items-center">
-                      <span className="text-main-view-fg/70">
-                        {t('system-monitor:driverVersion')}
-                      </span>
-                      <span className="text-main-view-fg">
-                        {gpu.driver_version || '-'}
-                      </span>
-                    </div>
-                    <div className="flex justify-between items-center">
-                      <span className="text-main-view-fg/70">
-                        {t('system-monitor:computeCapability')}
-                      </span>
-                      <span className="text-main-view-fg">
-                        {gpu.nvidia_info?.compute_capability ||
-                          gpu.vulkan_info?.api_version ||
-                          '-'}
-                      </span>
-                    </div>
-                    <div className="mt-2">
-                      <Progress
-                        value={
-                          gpuUsage
-                            ? (gpuUsage.used_memory / gpu.total_memory) * 100
-                            : 0
-                        }
-                        className="h-2 w-full"
-                      />
-                    </div>
-                  </div>
-                </div>
-              )
-            })}
-          </div>
-        ) : (
-          <div className="text-center text-main-view-fg/50 py-4">
-            {t('system-monitor:noGpus')}
-          </div>
-        )}
+        </div>
       </div>
     </div>
   )
diff --git a/web-app/src/services/hardware.ts b/web-app/src/services/hardware.ts
index c0615e858..700db5485 100644
--- a/web-app/src/services/hardware.ts
+++ b/web-app/src/services/hardware.ts
@@ -1,6 +1,14 @@
 import { HardwareData, SystemUsage } from '@/hooks/useHardware'
 import { invoke } from '@tauri-apps/api/core'
 
+// Device list interface for llamacpp extension
+export interface DeviceList {
+  id: string
+  name: string
+  mem: number
+  free: number
+}
+
 /**
  * Get hardware information from the HardwareManagementExtension.
  * @returns {Promise<HardwareInfo>} A promise that resolves to the hardware information.
@@ -17,6 +25,21 @@ export const getSystemUsage = async () => {
   return invoke('get_system_usage') as Promise<SystemUsage>
 }
 
+/**
+ * Get devices from the llamacpp extension.
+ * @returns {Promise<DeviceList[]>} A promise that resolves to the list of available devices.
+ */
+export const getLlamacppDevices = async (): Promise<DeviceList[]> => {
+  const extensionManager = window.core.extensionManager
+  const llamacppExtension = extensionManager.getByName('@janhq/llamacpp-extension')
+  
+  if (!llamacppExtension) {
+    throw new Error('llamacpp extension not found')
+  }
+  
+  return llamacppExtension.getDevices()
+}
+
 /**
  * Set gpus activate
  * @returns A Promise that resolves set gpus activate.