This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75 Revert "chore: add engine logo from local instead of metadata logo (#3363)" This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75. Revert "fix: LaTex formula render issue (#3353)" This reverts commit 3b2c84c4fee61b886c883c68801be3bc5a8584ad. Revert "chore: minor ui improvement (#3352)" This reverts commit 6dd387db2b5b9890f19d0c3505cf9cb770fd492f. Revert "fix: failed to relaunch app to update (#3351)" This reverts commit fcaf98a2fa4e674799602e8093914bcc04ced153. Revert "chore: add back GPU information to system monitoring bar (#3350)" This reverts commit 03455a91807c7af6c6325901997c6d7231d2cd0d. Revert "fix: empty model page not shown when delete all threads and models (#3343)" This reverts commit 9e29fcd69eb9085843896686806fd453a1285723. Revert "feat: allow user configure remote model from my model (#3348)" This reverts commit fdab8af057f80cf1ccaae0dc42c4e5161925f51e. Revert "chore: ui fix button outline for configure cloud model (#3347)" This reverts commit fe8ed1f26dc86ead92ffea4f36e2989caf7dad88. Revert "feat: move icon create new thread into top panel (#3346)" This reverts commit 46cb1b45b997181e2188f8dafb2fc0d0cc12ddcd. Revert "chore(UI): update experience model dropdown (#3342)" This reverts commit 8b44613015a907dc491113aeb99c963080424892. Revert "Chore/simple bug template and correct a copy (#3344)" This reverts commit 23cd5fd3979e7529811045da5c4912369bcc7532. Revert "chore(ui): fix alignment loader starter screen (#3338)" This reverts commit e9f5d2f837ce323b0851ea04cded913ab433388c. Revert "Increase retry upload to R2 to 5 times (#3337)" This reverts commit dcfb497934edc795955d971b6d391ee1e6309a03. Revert "fix: broken jan build - add log trace (jan.log) (#3336)" This reverts commit 77422c3a7ed240909942ac0d8c4b259af8d87a28. Revert "chore: disable quick ask (#3334)" This reverts commit 6e4b6b09ae009149f262d86d5b19bb8096267c19. Revert "fix: update legacy path (#3328)" This reverts commit 5eb112142c6431cfe0cdf11ce28810ca650a5427. Revert "chore: add cortex version (#3318)" This reverts commit 60587649c56a1f24272e763f25aa5b4042f7719a. Revert "fix: broken app due to incorrect api path (#3316)" This reverts commit 3de4eab2a0dfbf9f593d73b9dde6bca1d9df2279. Revert "feat: modal waiting cortex (#3306)" This reverts commit 1f5168d4af9080b867c19d334c398bf32e4f54b8. Revert "fix: refresh should not create new thread (#3314)" This reverts commit 624d07703c50ea332ed4eeac9dc3a26bc8190d08. Revert "fix: avoid lose title threads (#3307)" This reverts commit a4f5fda104c2d1e01ea72798f055e5b4e3cfd616. Revert "feat: change data folder (#3309)" This reverts commit b43242b9b24352c7f90995eccab753dede679616. Revert "feat: embed cortex into jan as a js module (#3305)" This reverts commit b348110fb73bd5f13c69f1b915168687dea776d0. Revert "fix: migration item in setting detail omit buttons (#3298)" This reverts commit 709204b2bc9d9ed08e2245cbb084482f5908ab3a. Revert "fix: merge gpu arch and os tensorrt models (#3299)" This reverts commit aa7dbdc9fa701debeee28d9c7eb4af6258685321. Revert "chore: update cortex new version (#3300)" This reverts commit 602097909d38b4874db8b9f19a729c65a0ac9619. Revert "fix: engine logo on model dropdown (#3291)" This reverts commit 8eb8611c28f6c4cdf1ab142a6e18c82bcc4c2073. Revert "fix: icon setting can close and open right panel (#3295)" This reverts commit be31e9315e2df5c483de3f46bd37740d277cfccd. Revert "fix: error while importing local model is not shown (#3294)" This reverts commit 26be941e8426462e1e3a28e5b9bf1f834f462f82. Revert "fix: add lower case quantization support (#3293)" This reverts commit 3135ccc27e894a4056f882cd25f0bf7e10e56f49. Revert "fix: onnx can't be selected in download model modal (#3283)" This reverts commit 2521e1db518e9e01493e89dcc98c181ccd2b48a2. Revert "feat: add chunk count (#3290)" This reverts commit bad481bf05aa38edcf553e1273f5d692a65c9225. Revert "fix: RAM always show 0% (#3287)" This reverts commit 2201e6c5f87538b953503937fe6b135fe1aa2d94. Revert "fix: remote engine should not allow reinit (#3284)" This reverts commit 98abff0da3467c090618233db12a25bfa4c1db69. Revert "chore": update minor UI (#3281)" This reverts commit 105a9aa1a1830648a32ae285f751b4078c8ac2b2. Revert "chore: update z-index tooltip (#3280)" This reverts commit 5a81865508c205ed8c54df209092553a0c40054f. Revert "feat: add nvidia engine (#3279)" This reverts commit 8372f30f0ee99606b123351e7bb62636c62c8b23. Revert "fix: migration wrong directory (#3278)" This reverts commit 7fb1354287677f577070ccb065ed3a5f9e5b9882. Revert "fix: clearer app loading prompt (#3275)" This reverts commit 44a6401000334b79b225ab6fd6afb79f9da4bd51. Revert "fix: allow user to reinit engine from settings page (#3277)" This reverts commit 57cf3c7b3d5bface785763d06813906ba6eab7c9. Revert "feat: enable copy over instructions (#3266)" This reverts commit 2074511067201f0addb9d274cc90d1e782f2bc1d. Revert "chore: toast message on model import fail with reason (#3276)" This reverts commit 3bebdfe67e1571c7414065a36d16eb5941115ee0. Revert "fix: should not let second instance terminate cortex (#3274)" This reverts commit d074a5a445b73ca195a49814a935300f9e895aaa. Revert "chore: remnove focus button (#3272)" This reverts commit 07fa79e71a401becdbc0f474c27b860654a8bd62. Revert "chore: update hub search result (#3273)" This reverts commit 10b4a9087af709d147b34f6c3ee63d2d3b75c77a. Revert "chore: temporary hidden import model (#3270)" This reverts commit db5d8aba454fd4cc1e07253ca4805d4b1b3e7fb2. Revert "fix: set cortex data folder path when starting jan (#3252)" This reverts commit 91c77eda78ecd251d480e58b853fe7b261f6de50. Revert "fix: remote model added manually does not shown in model drop down (#3261)" This reverts commit 224ca3f7cc25b2577ab123829907964b78b78aa8. Revert "feat: add more options for cortex popup (#3236)" This reverts commit 5e06ed8a122aaed9d68fbd04ce42b65bf8987e58. Revert "feat: manage cloud models from threads screen (#3223)" This reverts commit 37a3c4f844419e66cfe3f2a9ff79ba688538241f. Revert "chore: check the legacy incompatible message type (#3248)" This reverts commit c10caf8d7f1f9cf68551e41de5d54cd4450cf44a. Revert "chore: minor copy for grammar (#3235)" This reverts commit f0f23078f31f58e01ba27787d6926f5c1eb2ff0b. Revert "fix: add back normalize message function (#3234)" This reverts commit 83579df3a40ff61eac25975da8295fceaec679dc. Revert "chore: update conditional starter screen after cortex load (#3227)" This reverts commit 4d3a97f1dca9e6c3ea746586e8607541f2d1c0b3. Revert "fix: broken status parse due to empty category (#3233)" This reverts commit 68714eeaf9212a6fdacd5c6a48d8691db9cc99eb. Revert "feat: make scroll area type auto for make default visible scrollbar (#3220)" This reverts commit 13428d60e7d3ea6a24c0df8871ea13e2dec0d5fd. Revert "fix: update new api from cortex to support 0.5.0 (#3221)" This reverts commit ec9b5bf682a8676e132a08075b6ae03cf9e23132. Revert "feat: new starter screen (#3217)" This reverts commit e8ee694abd33b34112d2c7d09f8c03370c2d22cc. Revert "bump-cortex-0.5.0-1 (#3218)" This reverts commit 5369da78f5b83b1c8761cb48820ccf3111728a90. Revert "Deprecate Docker and K8s (#3219)" This reverts commit 7611a05c44982d07465bec57658d5bf965f30ad5. Revert "chore: set container max width for chat message and new hub screen (#3213)" This reverts commit 007daa71616268b0e741e7a890b319401e49a81e. Revert "feat: integrating cortex (#3001)" This reverts commit 101268f6f36df96b62982a9eeb8581ebe103a909.
428 lines
16 KiB
Python
428 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import shutil
|
|
import struct
|
|
import tempfile
|
|
from enum import Enum, auto
|
|
from io import BufferedWriter
|
|
from typing import IO, Any, Sequence
|
|
|
|
import numpy as np
|
|
|
|
from .constants import (
|
|
GGUF_DEFAULT_ALIGNMENT,
|
|
GGUF_MAGIC,
|
|
GGUF_VERSION,
|
|
GGMLQuantizationType,
|
|
GGUFEndian,
|
|
GGUFValueType,
|
|
Keys,
|
|
RopeScalingType,
|
|
TokenType,
|
|
)
|
|
|
|
|
|
class WriterState(Enum):
|
|
EMPTY = auto()
|
|
HEADER = auto()
|
|
KV_DATA = auto()
|
|
TI_DATA = auto()
|
|
|
|
|
|
class GGUFWriter:
|
|
fout: BufferedWriter
|
|
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
|
|
tensors: list[np.ndarray[Any, Any]]
|
|
_simple_value_packing = {
|
|
GGUFValueType.UINT8: "B",
|
|
GGUFValueType.INT8: "b",
|
|
GGUFValueType.UINT16: "H",
|
|
GGUFValueType.INT16: "h",
|
|
GGUFValueType.UINT32: "I",
|
|
GGUFValueType.INT32: "i",
|
|
GGUFValueType.FLOAT32: "f",
|
|
GGUFValueType.UINT64: "Q",
|
|
GGUFValueType.INT64: "q",
|
|
GGUFValueType.FLOAT64: "d",
|
|
GGUFValueType.BOOL: "?",
|
|
}
|
|
|
|
def __init__(
|
|
self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
|
|
endianess: GGUFEndian = GGUFEndian.LITTLE,
|
|
):
|
|
self.fout = open(path, "wb")
|
|
self.arch = arch
|
|
self.endianess = endianess
|
|
self.offset_tensor = 0
|
|
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
|
|
self.kv_data = bytearray()
|
|
self.kv_data_count = 0
|
|
self.ti_data = bytearray()
|
|
self.ti_data_count = 0
|
|
self.use_temp_file = use_temp_file
|
|
self.temp_file = None
|
|
self.tensors = []
|
|
print("gguf: This GGUF file is for {0} Endian only".format(
|
|
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
|
))
|
|
self.state = WriterState.EMPTY
|
|
|
|
self.add_architecture()
|
|
|
|
def write_header_to_file(self) -> None:
|
|
if self.state is not WriterState.EMPTY:
|
|
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
|
|
|
self._write_packed("<I", GGUF_MAGIC, skip_pack_prefix = True)
|
|
self._write_packed("I", GGUF_VERSION)
|
|
self._write_packed("Q", self.ti_data_count)
|
|
self._write_packed("Q", self.kv_data_count)
|
|
self.flush()
|
|
self.state = WriterState.HEADER
|
|
|
|
def write_kv_data_to_file(self) -> None:
|
|
if self.state is not WriterState.HEADER:
|
|
raise ValueError(f'Expected output file to contain the header, got {self.state}')
|
|
|
|
self.fout.write(self.kv_data)
|
|
self.flush()
|
|
self.state = WriterState.KV_DATA
|
|
|
|
def write_ti_data_to_file(self) -> None:
|
|
if self.state is not WriterState.KV_DATA:
|
|
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
|
|
|
|
self.fout.write(self.ti_data)
|
|
self.flush()
|
|
self.state = WriterState.TI_DATA
|
|
|
|
def add_key(self, key: str) -> None:
|
|
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
|
|
|
|
def add_uint8(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.UINT8)
|
|
|
|
def add_int8(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.INT8)
|
|
|
|
def add_uint16(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.UINT16)
|
|
|
|
def add_int16(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.INT16)
|
|
|
|
def add_uint32(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.UINT32)
|
|
|
|
def add_int32(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.INT32)
|
|
|
|
def add_float32(self, key: str, val: float) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.FLOAT32)
|
|
|
|
def add_uint64(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.UINT64)
|
|
|
|
def add_int64(self, key: str, val: int) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.INT64)
|
|
|
|
def add_float64(self, key: str, val: float) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.FLOAT64)
|
|
|
|
def add_bool(self, key: str, val: bool) -> None:
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.BOOL)
|
|
|
|
def add_string(self, key: str, val: str) -> None:
|
|
if not val:
|
|
return
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.STRING)
|
|
|
|
def add_array(self, key: str, val: Sequence[Any]) -> None:
|
|
if not isinstance(val, Sequence):
|
|
raise ValueError("Value must be a sequence for array type")
|
|
|
|
self.add_key(key)
|
|
self.add_val(val, GGUFValueType.ARRAY)
|
|
|
|
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True) -> None:
|
|
if vtype is None:
|
|
vtype = GGUFValueType.get_type(val)
|
|
|
|
if add_vtype:
|
|
self.kv_data += self._pack("I", vtype)
|
|
self.kv_data_count += 1
|
|
|
|
pack_fmt = self._simple_value_packing.get(vtype)
|
|
if pack_fmt is not None:
|
|
self.kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
|
|
elif vtype == GGUFValueType.STRING:
|
|
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
|
self.kv_data += self._pack("Q", len(encoded_val))
|
|
self.kv_data += encoded_val
|
|
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val:
|
|
ltype = GGUFValueType.get_type(val[0])
|
|
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
|
raise ValueError("All items in a GGUF array should be of the same type")
|
|
self.kv_data += self._pack("I", ltype)
|
|
self.kv_data += self._pack("Q", len(val))
|
|
for item in val:
|
|
self.add_val(item, add_vtype=False)
|
|
else:
|
|
raise ValueError("Invalid GGUF metadata value type or value")
|
|
|
|
@staticmethod
|
|
def ggml_pad(x: int, n: int) -> int:
|
|
return ((x + n - 1) // n) * n
|
|
|
|
def add_tensor_info(
|
|
self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32],
|
|
tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
|
|
) -> None:
|
|
if self.state is not WriterState.EMPTY:
|
|
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
|
|
|
if raw_dtype is None and tensor_dtype not in (np.float32, np.float16):
|
|
raise ValueError("Only F32 and F16 tensors are supported for now")
|
|
|
|
encoded_name = name.encode("utf8")
|
|
self.ti_data += self._pack("Q", len(encoded_name))
|
|
self.ti_data += encoded_name
|
|
n_dims = len(tensor_shape)
|
|
self.ti_data += self._pack("I", n_dims)
|
|
for i in range(n_dims):
|
|
self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
|
|
if raw_dtype is None:
|
|
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
|
else:
|
|
dtype = raw_dtype
|
|
self.ti_data += self._pack("I", dtype)
|
|
self.ti_data += self._pack("Q", self.offset_tensor)
|
|
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
|
self.ti_data_count += 1
|
|
|
|
def add_tensor(
|
|
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
|
raw_dtype: GGMLQuantizationType | None = None,
|
|
) -> None:
|
|
if self.endianess == GGUFEndian.BIG:
|
|
tensor.byteswap(inplace=True)
|
|
if self.use_temp_file and self.temp_file is None:
|
|
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
|
|
fp.seek(0)
|
|
self.temp_file = fp
|
|
|
|
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
|
|
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
|
|
|
|
if self.temp_file is None:
|
|
self.tensors.append(tensor)
|
|
return
|
|
|
|
tensor.tofile(self.temp_file)
|
|
self.write_padding(self.temp_file, tensor.nbytes)
|
|
|
|
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
|
|
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
|
|
if pad != 0:
|
|
fp.write(bytes([0] * pad))
|
|
|
|
def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
|
|
if self.state is not WriterState.TI_DATA:
|
|
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
|
|
|
|
if self.endianess == GGUFEndian.BIG:
|
|
tensor.byteswap(inplace=True)
|
|
self.write_padding(self.fout, self.fout.tell())
|
|
tensor.tofile(self.fout)
|
|
self.write_padding(self.fout, tensor.nbytes)
|
|
|
|
def write_tensors_to_file(self) -> None:
|
|
self.write_ti_data_to_file()
|
|
|
|
self.write_padding(self.fout, self.fout.tell())
|
|
|
|
if self.temp_file is None:
|
|
while True:
|
|
try:
|
|
tensor = self.tensors.pop(0)
|
|
except IndexError:
|
|
break
|
|
tensor.tofile(self.fout)
|
|
self.write_padding(self.fout, tensor.nbytes)
|
|
return
|
|
|
|
self.temp_file.seek(0)
|
|
|
|
shutil.copyfileobj(self.temp_file, self.fout)
|
|
self.flush()
|
|
self.temp_file.close()
|
|
|
|
def flush(self) -> None:
|
|
self.fout.flush()
|
|
|
|
def close(self) -> None:
|
|
self.fout.close()
|
|
|
|
def add_architecture(self) -> None:
|
|
self.add_string(Keys.General.ARCHITECTURE, self.arch)
|
|
|
|
def add_author(self, author: str) -> None:
|
|
self.add_string(Keys.General.AUTHOR, author)
|
|
|
|
def add_tensor_data_layout(self, layout: str) -> None:
|
|
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
|
|
|
def add_url(self, url: str) -> None:
|
|
self.add_string(Keys.General.URL, url)
|
|
|
|
def add_description(self, description: str) -> None:
|
|
self.add_string(Keys.General.DESCRIPTION, description)
|
|
|
|
def add_source_url(self, url: str) -> None:
|
|
self.add_string(Keys.General.SOURCE_URL, url)
|
|
|
|
def add_source_hf_repo(self, repo: str) -> None:
|
|
self.add_string(Keys.General.SOURCE_HF_REPO, repo)
|
|
|
|
def add_file_type(self, ftype: int) -> None:
|
|
self.add_uint32(Keys.General.FILE_TYPE, ftype)
|
|
|
|
def add_name(self, name: str) -> None:
|
|
self.add_string(Keys.General.NAME, name)
|
|
|
|
def add_quantization_version(self, quantization_version: GGMLQuantizationType) -> None:
|
|
self.add_uint32(
|
|
Keys.General.QUANTIZATION_VERSION, quantization_version)
|
|
|
|
def add_custom_alignment(self, alignment: int) -> None:
|
|
self.data_alignment = alignment
|
|
self.add_uint32(Keys.General.ALIGNMENT, alignment)
|
|
|
|
def add_context_length(self, length: int) -> None:
|
|
self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length)
|
|
|
|
def add_embedding_length(self, length: int) -> None:
|
|
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
|
|
|
def add_block_count(self, length: int) -> None:
|
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
|
|
|
def add_feed_forward_length(self, length: int) -> None:
|
|
self.add_uint32(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
|
|
|
def add_parallel_residual(self, use: bool) -> None:
|
|
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
|
|
|
def add_head_count(self, count: int) -> None:
|
|
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
|
|
|
def add_head_count_kv(self, count: int) -> None:
|
|
self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
|
|
|
|
def add_key_length(self, length: int) -> None:
|
|
self.add_uint32(Keys.Attention.KEY_LENGTH.format(arch=self.arch), length)
|
|
|
|
def add_value_length(self, length: int) -> None:
|
|
self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
|
|
|
|
def add_max_alibi_bias(self, bias: float) -> None:
|
|
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
|
|
|
|
def add_clamp_kqv(self, value: float) -> None:
|
|
self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
|
|
|
|
def add_expert_count(self, count: int) -> None:
|
|
self.add_uint32(Keys.LLM.EXPERT_COUNT.format(arch=self.arch), count)
|
|
|
|
def add_expert_used_count(self, count: int) -> None:
|
|
self.add_uint32(Keys.LLM.EXPERT_USED_COUNT.format(arch=self.arch), count)
|
|
|
|
def add_layer_norm_eps(self, value: float) -> None:
|
|
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
|
|
|
def add_layer_norm_rms_eps(self, value: float) -> None:
|
|
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
|
|
|
def add_rope_dimension_count(self, count: int) -> None:
|
|
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
|
|
|
def add_rope_freq_base(self, value: float) -> None:
|
|
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
|
|
|
def add_rope_scaling_type(self, value: RopeScalingType) -> None:
|
|
self.add_string(Keys.Rope.SCALING_TYPE.format(arch=self.arch), value.value)
|
|
|
|
def add_rope_scaling_factor(self, value: float) -> None:
|
|
self.add_float32(Keys.Rope.SCALING_FACTOR.format(arch=self.arch), value)
|
|
|
|
def add_rope_scaling_orig_ctx_len(self, value: int) -> None:
|
|
self.add_uint32(Keys.Rope.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value)
|
|
|
|
def add_rope_scaling_finetuned(self, value: bool) -> None:
|
|
self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
|
|
|
|
def add_tokenizer_model(self, model: str) -> None:
|
|
self.add_string(Keys.Tokenizer.MODEL, model)
|
|
|
|
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
|
|
self.add_array(Keys.Tokenizer.LIST, tokens)
|
|
|
|
def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
|
|
self.add_array(Keys.Tokenizer.MERGES, merges)
|
|
|
|
def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
|
|
self.add_array(Keys.Tokenizer.TOKEN_TYPE, types)
|
|
|
|
def add_token_scores(self, scores: Sequence[float]) -> None:
|
|
self.add_array(Keys.Tokenizer.SCORES, scores)
|
|
|
|
def add_bos_token_id(self, id: int) -> None:
|
|
self.add_uint32(Keys.Tokenizer.BOS_ID, id)
|
|
|
|
def add_eos_token_id(self, id: int) -> None:
|
|
self.add_uint32(Keys.Tokenizer.EOS_ID, id)
|
|
|
|
def add_unk_token_id(self, id: int) -> None:
|
|
self.add_uint32(Keys.Tokenizer.UNK_ID, id)
|
|
|
|
def add_sep_token_id(self, id: int) -> None:
|
|
self.add_uint32(Keys.Tokenizer.SEP_ID, id)
|
|
|
|
def add_pad_token_id(self, id: int) -> None:
|
|
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
|
|
|
def add_add_bos_token(self, value: bool) -> None:
|
|
self.add_bool(Keys.Tokenizer.ADD_BOS, value)
|
|
|
|
def add_add_eos_token(self, value: bool) -> None:
|
|
self.add_bool(Keys.Tokenizer.ADD_EOS, value)
|
|
|
|
def add_add_space_prefix(self, value: bool) -> None:
|
|
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
|
|
|
|
def add_chat_template(self, value: str) -> None:
|
|
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
|
|
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
|
pack_prefix = ''
|
|
if not skip_pack_prefix:
|
|
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
|
return struct.pack(f'{pack_prefix}{fmt}', value)
|
|
|
|
def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
|
|
self.fout.write(self._pack(fmt, value, skip_pack_prefix))
|