This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75 Revert "chore: add engine logo from local instead of metadata logo (#3363)" This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75. Revert "fix: LaTex formula render issue (#3353)" This reverts commit 3b2c84c4fee61b886c883c68801be3bc5a8584ad. Revert "chore: minor ui improvement (#3352)" This reverts commit 6dd387db2b5b9890f19d0c3505cf9cb770fd492f. Revert "fix: failed to relaunch app to update (#3351)" This reverts commit fcaf98a2fa4e674799602e8093914bcc04ced153. Revert "chore: add back GPU information to system monitoring bar (#3350)" This reverts commit 03455a91807c7af6c6325901997c6d7231d2cd0d. Revert "fix: empty model page not shown when delete all threads and models (#3343)" This reverts commit 9e29fcd69eb9085843896686806fd453a1285723. Revert "feat: allow user configure remote model from my model (#3348)" This reverts commit fdab8af057f80cf1ccaae0dc42c4e5161925f51e. Revert "chore: ui fix button outline for configure cloud model (#3347)" This reverts commit fe8ed1f26dc86ead92ffea4f36e2989caf7dad88. Revert "feat: move icon create new thread into top panel (#3346)" This reverts commit 46cb1b45b997181e2188f8dafb2fc0d0cc12ddcd. Revert "chore(UI): update experience model dropdown (#3342)" This reverts commit 8b44613015a907dc491113aeb99c963080424892. Revert "Chore/simple bug template and correct a copy (#3344)" This reverts commit 23cd5fd3979e7529811045da5c4912369bcc7532. Revert "chore(ui): fix alignment loader starter screen (#3338)" This reverts commit e9f5d2f837ce323b0851ea04cded913ab433388c. Revert "Increase retry upload to R2 to 5 times (#3337)" This reverts commit dcfb497934edc795955d971b6d391ee1e6309a03. Revert "fix: broken jan build - add log trace (jan.log) (#3336)" This reverts commit 77422c3a7ed240909942ac0d8c4b259af8d87a28. Revert "chore: disable quick ask (#3334)" This reverts commit 6e4b6b09ae009149f262d86d5b19bb8096267c19. Revert "fix: update legacy path (#3328)" This reverts commit 5eb112142c6431cfe0cdf11ce28810ca650a5427. Revert "chore: add cortex version (#3318)" This reverts commit 60587649c56a1f24272e763f25aa5b4042f7719a. Revert "fix: broken app due to incorrect api path (#3316)" This reverts commit 3de4eab2a0dfbf9f593d73b9dde6bca1d9df2279. Revert "feat: modal waiting cortex (#3306)" This reverts commit 1f5168d4af9080b867c19d334c398bf32e4f54b8. Revert "fix: refresh should not create new thread (#3314)" This reverts commit 624d07703c50ea332ed4eeac9dc3a26bc8190d08. Revert "fix: avoid lose title threads (#3307)" This reverts commit a4f5fda104c2d1e01ea72798f055e5b4e3cfd616. Revert "feat: change data folder (#3309)" This reverts commit b43242b9b24352c7f90995eccab753dede679616. Revert "feat: embed cortex into jan as a js module (#3305)" This reverts commit b348110fb73bd5f13c69f1b915168687dea776d0. Revert "fix: migration item in setting detail omit buttons (#3298)" This reverts commit 709204b2bc9d9ed08e2245cbb084482f5908ab3a. Revert "fix: merge gpu arch and os tensorrt models (#3299)" This reverts commit aa7dbdc9fa701debeee28d9c7eb4af6258685321. Revert "chore: update cortex new version (#3300)" This reverts commit 602097909d38b4874db8b9f19a729c65a0ac9619. Revert "fix: engine logo on model dropdown (#3291)" This reverts commit 8eb8611c28f6c4cdf1ab142a6e18c82bcc4c2073. Revert "fix: icon setting can close and open right panel (#3295)" This reverts commit be31e9315e2df5c483de3f46bd37740d277cfccd. Revert "fix: error while importing local model is not shown (#3294)" This reverts commit 26be941e8426462e1e3a28e5b9bf1f834f462f82. Revert "fix: add lower case quantization support (#3293)" This reverts commit 3135ccc27e894a4056f882cd25f0bf7e10e56f49. Revert "fix: onnx can't be selected in download model modal (#3283)" This reverts commit 2521e1db518e9e01493e89dcc98c181ccd2b48a2. Revert "feat: add chunk count (#3290)" This reverts commit bad481bf05aa38edcf553e1273f5d692a65c9225. Revert "fix: RAM always show 0% (#3287)" This reverts commit 2201e6c5f87538b953503937fe6b135fe1aa2d94. Revert "fix: remote engine should not allow reinit (#3284)" This reverts commit 98abff0da3467c090618233db12a25bfa4c1db69. Revert "chore": update minor UI (#3281)" This reverts commit 105a9aa1a1830648a32ae285f751b4078c8ac2b2. Revert "chore: update z-index tooltip (#3280)" This reverts commit 5a81865508c205ed8c54df209092553a0c40054f. Revert "feat: add nvidia engine (#3279)" This reverts commit 8372f30f0ee99606b123351e7bb62636c62c8b23. Revert "fix: migration wrong directory (#3278)" This reverts commit 7fb1354287677f577070ccb065ed3a5f9e5b9882. Revert "fix: clearer app loading prompt (#3275)" This reverts commit 44a6401000334b79b225ab6fd6afb79f9da4bd51. Revert "fix: allow user to reinit engine from settings page (#3277)" This reverts commit 57cf3c7b3d5bface785763d06813906ba6eab7c9. Revert "feat: enable copy over instructions (#3266)" This reverts commit 2074511067201f0addb9d274cc90d1e782f2bc1d. Revert "chore: toast message on model import fail with reason (#3276)" This reverts commit 3bebdfe67e1571c7414065a36d16eb5941115ee0. Revert "fix: should not let second instance terminate cortex (#3274)" This reverts commit d074a5a445b73ca195a49814a935300f9e895aaa. Revert "chore: remnove focus button (#3272)" This reverts commit 07fa79e71a401becdbc0f474c27b860654a8bd62. Revert "chore: update hub search result (#3273)" This reverts commit 10b4a9087af709d147b34f6c3ee63d2d3b75c77a. Revert "chore: temporary hidden import model (#3270)" This reverts commit db5d8aba454fd4cc1e07253ca4805d4b1b3e7fb2. Revert "fix: set cortex data folder path when starting jan (#3252)" This reverts commit 91c77eda78ecd251d480e58b853fe7b261f6de50. Revert "fix: remote model added manually does not shown in model drop down (#3261)" This reverts commit 224ca3f7cc25b2577ab123829907964b78b78aa8. Revert "feat: add more options for cortex popup (#3236)" This reverts commit 5e06ed8a122aaed9d68fbd04ce42b65bf8987e58. Revert "feat: manage cloud models from threads screen (#3223)" This reverts commit 37a3c4f844419e66cfe3f2a9ff79ba688538241f. Revert "chore: check the legacy incompatible message type (#3248)" This reverts commit c10caf8d7f1f9cf68551e41de5d54cd4450cf44a. Revert "chore: minor copy for grammar (#3235)" This reverts commit f0f23078f31f58e01ba27787d6926f5c1eb2ff0b. Revert "fix: add back normalize message function (#3234)" This reverts commit 83579df3a40ff61eac25975da8295fceaec679dc. Revert "chore: update conditional starter screen after cortex load (#3227)" This reverts commit 4d3a97f1dca9e6c3ea746586e8607541f2d1c0b3. Revert "fix: broken status parse due to empty category (#3233)" This reverts commit 68714eeaf9212a6fdacd5c6a48d8691db9cc99eb. Revert "feat: make scroll area type auto for make default visible scrollbar (#3220)" This reverts commit 13428d60e7d3ea6a24c0df8871ea13e2dec0d5fd. Revert "fix: update new api from cortex to support 0.5.0 (#3221)" This reverts commit ec9b5bf682a8676e132a08075b6ae03cf9e23132. Revert "feat: new starter screen (#3217)" This reverts commit e8ee694abd33b34112d2c7d09f8c03370c2d22cc. Revert "bump-cortex-0.5.0-1 (#3218)" This reverts commit 5369da78f5b83b1c8761cb48820ccf3111728a90. Revert "Deprecate Docker and K8s (#3219)" This reverts commit 7611a05c44982d07465bec57658d5bf965f30ad5. Revert "chore: set container max width for chat message and new hub screen (#3213)" This reverts commit 007daa71616268b0e741e7a890b319401e49a81e. Revert "feat: integrating cortex (#3001)" This reverts commit 101268f6f36df96b62982a9eeb8581ebe103a909.
265 lines
10 KiB
Python
265 lines
10 KiB
Python
#
|
|
# GGUF file reading/modification support. For API usage information,
|
|
# please see the files scripts/ for some fairly simple examples.
|
|
#
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from collections import OrderedDict
|
|
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
|
|
|
import numpy as np
|
|
import numpy.typing as npt
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Allow running file in package as a script.
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from gguf.constants import (
|
|
GGML_QUANT_SIZES,
|
|
GGUF_DEFAULT_ALIGNMENT,
|
|
GGUF_MAGIC,
|
|
GGUF_VERSION,
|
|
GGMLQuantizationType,
|
|
GGUFValueType,
|
|
)
|
|
|
|
|
|
READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
|
|
|
|
|
|
class ReaderField(NamedTuple):
|
|
# Offset to start of this field.
|
|
offset: int
|
|
|
|
# Name of the field (not necessarily from file data).
|
|
name: str
|
|
|
|
# Data parts. Some types have multiple components, such as strings
|
|
# that consist of a length followed by the string data.
|
|
parts: list[npt.NDArray[Any]] = []
|
|
|
|
# Indexes into parts that we can call the actual data. For example
|
|
# an array of strings will be populated with indexes to the actual
|
|
# string data.
|
|
data: list[int] = [-1]
|
|
|
|
types: list[GGUFValueType] = []
|
|
|
|
|
|
class ReaderTensor(NamedTuple):
|
|
name: str
|
|
tensor_type: GGMLQuantizationType
|
|
shape: npt.NDArray[np.uint32]
|
|
n_elements: int
|
|
n_bytes: int
|
|
data_offset: int
|
|
data: npt.NDArray[Any]
|
|
field: ReaderField
|
|
|
|
|
|
class GGUFReader:
|
|
# I - same as host, S - swapped
|
|
byte_order: Literal['I' | 'S'] = 'I'
|
|
alignment: int = GGUF_DEFAULT_ALIGNMENT
|
|
|
|
# Note: Internal helper, API may change.
|
|
gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
|
|
GGUFValueType.UINT8: np.uint8,
|
|
GGUFValueType.INT8: np.int8,
|
|
GGUFValueType.UINT16: np.uint16,
|
|
GGUFValueType.INT16: np.int16,
|
|
GGUFValueType.UINT32: np.uint32,
|
|
GGUFValueType.INT32: np.int32,
|
|
GGUFValueType.FLOAT32: np.float32,
|
|
GGUFValueType.UINT64: np.uint64,
|
|
GGUFValueType.INT64: np.int64,
|
|
GGUFValueType.FLOAT64: np.float64,
|
|
GGUFValueType.BOOL: np.bool_,
|
|
}
|
|
|
|
def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r'):
|
|
self.data = np.memmap(path, mode = mode)
|
|
offs = 0
|
|
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
|
|
raise ValueError('GGUF magic invalid')
|
|
offs += 4
|
|
temp_version = self._get(offs, np.uint32)
|
|
if temp_version[0] & 65535 == 0:
|
|
# If we get 0 here that means it's (probably) a GGUF file created for
|
|
# the opposite byte order of the machine this script is running on.
|
|
self.byte_order = 'S'
|
|
temp_version = temp_version.newbyteorder(self.byte_order)
|
|
version = temp_version[0]
|
|
if version not in READER_SUPPORTED_VERSIONS:
|
|
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
|
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
|
self.tensors: list[ReaderTensor] = []
|
|
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
|
temp_counts = self._get(offs, np.uint64, 2)
|
|
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
|
|
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
|
|
tensor_count, kv_count = temp_counts
|
|
offs = self._build_fields(offs, kv_count)
|
|
offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
|
|
new_align = self.fields.get('general.alignment')
|
|
if new_align is not None:
|
|
if new_align.types != [GGUFValueType.UINT32]:
|
|
raise ValueError('Bad type for general.alignment field')
|
|
self.alignment = new_align.parts[-1][0]
|
|
padding = offs % self.alignment
|
|
if padding != 0:
|
|
offs += self.alignment - padding
|
|
self._build_tensors(offs, tensors_fields)
|
|
|
|
_DT = TypeVar('_DT', bound = npt.DTypeLike)
|
|
|
|
# Fetch a key/value metadata field by key.
|
|
def get_field(self, key: str) -> Union[ReaderField, None]:
|
|
return self.fields.get(key, None)
|
|
|
|
# Fetch a tensor from the list by index.
|
|
def get_tensor(self, idx: int) -> ReaderTensor:
|
|
return self.tensors[idx]
|
|
|
|
def _get(
|
|
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None,
|
|
) -> npt.NDArray[Any]:
|
|
count = int(count)
|
|
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
|
end_offs = offset + itemsize * count
|
|
return (
|
|
self.data[offset:end_offs]
|
|
.view(dtype = dtype)[:count]
|
|
.newbyteorder(override_order or self.byte_order)
|
|
)
|
|
|
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
|
if field.name in self.fields:
|
|
raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
|
|
self.fields[field.name] = field
|
|
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
|
|
|
|
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
|
slen = self._get(offset, np.uint64)
|
|
return slen, self._get(offset + 8, np.uint8, slen[0])
|
|
|
|
def _get_field_parts(
|
|
self, orig_offs: int, raw_type: int,
|
|
) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
|
|
offs = orig_offs
|
|
types: list[GGUFValueType] = []
|
|
gtype = GGUFValueType(raw_type)
|
|
types.append(gtype)
|
|
# Handle strings.
|
|
if gtype == GGUFValueType.STRING:
|
|
sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
|
|
size = sum(int(part.nbytes) for part in sparts)
|
|
return size, sparts, [1], types
|
|
# Check if it's a simple scalar type.
|
|
nptype = self.gguf_scalar_to_np.get(gtype)
|
|
if nptype is not None:
|
|
val = self._get(offs, nptype)
|
|
return int(val.nbytes), [val], [0], types
|
|
# Handle arrays.
|
|
if gtype == GGUFValueType.ARRAY:
|
|
raw_itype = self._get(offs, np.uint32)
|
|
offs += int(raw_itype.nbytes)
|
|
alen = self._get(offs, np.uint64)
|
|
offs += int(alen.nbytes)
|
|
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
|
data_idxs: list[int] = []
|
|
for idx in range(alen[0]):
|
|
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
|
|
if idx == 0:
|
|
types += curr_types
|
|
idxs_offs = len(aparts)
|
|
aparts += curr_parts
|
|
data_idxs += (idx + idxs_offs for idx in curr_idxs)
|
|
offs += curr_size
|
|
return offs - orig_offs, aparts, data_idxs, types
|
|
# We can't deal with this one.
|
|
raise ValueError('Unknown/unhandled field type {gtype}')
|
|
|
|
def _get_tensor(self, orig_offs: int) -> ReaderField:
|
|
offs = orig_offs
|
|
name_len, name_data = self._get_str(offs)
|
|
offs += int(name_len.nbytes + name_data.nbytes)
|
|
n_dims = self._get(offs, np.uint32)
|
|
offs += int(n_dims.nbytes)
|
|
dims = self._get(offs, np.uint64, n_dims[0])
|
|
offs += int(dims.nbytes)
|
|
raw_dtype = self._get(offs, np.uint32)
|
|
offs += int(raw_dtype.nbytes)
|
|
offset_tensor = self._get(offs, np.uint64)
|
|
offs += int(offset_tensor.nbytes)
|
|
return ReaderField(
|
|
orig_offs,
|
|
str(bytes(name_data), encoding = 'utf-8'),
|
|
[name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
|
|
[1, 3, 4, 5],
|
|
)
|
|
|
|
def _build_fields(self, offs: int, count: int) -> int:
|
|
for _ in range(count):
|
|
orig_offs = offs
|
|
kv_klen, kv_kdata = self._get_str(offs)
|
|
offs += int(kv_klen.nbytes + kv_kdata.nbytes)
|
|
raw_kv_type = self._get(offs, np.uint32)
|
|
offs += int(raw_kv_type.nbytes)
|
|
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
|
|
idxs_offs = len(parts)
|
|
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
|
|
parts += field_parts
|
|
self._push_field(ReaderField(
|
|
orig_offs,
|
|
str(bytes(kv_kdata), encoding = 'utf-8'),
|
|
parts,
|
|
[idx + idxs_offs for idx in field_idxs],
|
|
field_types,
|
|
), skip_sum = True)
|
|
offs += field_size
|
|
return offs
|
|
|
|
def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
|
|
tensor_fields = []
|
|
for _ in range(count):
|
|
field = self._get_tensor(offs)
|
|
offs += sum(int(part.nbytes) for part in field.parts)
|
|
tensor_fields.append(field)
|
|
return offs, tensor_fields
|
|
|
|
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
|
tensors = []
|
|
for field in fields:
|
|
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
|
|
ggml_type = GGMLQuantizationType(raw_dtype[0])
|
|
n_elems = np.prod(dims)
|
|
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
|
|
n_bytes = n_elems * type_size // block_size
|
|
data_offs = int(start_offs + offset_tensor[0])
|
|
item_type: npt.DTypeLike
|
|
if ggml_type == GGMLQuantizationType.F32:
|
|
item_count = n_elems
|
|
item_type = np.float32
|
|
elif ggml_type == GGMLQuantizationType.F16:
|
|
item_count = n_elems
|
|
item_type = np.float16
|
|
else:
|
|
item_count = n_bytes
|
|
item_type = np.uint8
|
|
tensors.append(ReaderTensor(
|
|
name = str(bytes(name_data), encoding = 'utf-8'),
|
|
tensor_type = ggml_type,
|
|
shape = dims,
|
|
n_elements = n_elems,
|
|
n_bytes = n_bytes,
|
|
data_offset = data_offs,
|
|
data = self._get(data_offs, item_type, item_count),
|
|
field = field,
|
|
))
|
|
self.tensors = tensors
|