Louis a699f8f32f
Revert "Jan integrates Cortex"
This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75

Revert "chore: add engine logo from local instead of metadata logo (#3363)"

This reverts commit ad6fbea22df6deaba31e146dddb456e4a5d5dd75.

Revert "fix: LaTex formula render issue (#3353)"

This reverts commit 3b2c84c4fee61b886c883c68801be3bc5a8584ad.

Revert "chore: minor ui improvement (#3352)"

This reverts commit 6dd387db2b5b9890f19d0c3505cf9cb770fd492f.

Revert "fix: failed to relaunch app to update (#3351)"

This reverts commit fcaf98a2fa4e674799602e8093914bcc04ced153.

Revert "chore: add back GPU information to system monitoring bar (#3350)"

This reverts commit 03455a91807c7af6c6325901997c6d7231d2cd0d.

Revert "fix: empty model page not shown when delete all threads and models (#3343)"

This reverts commit 9e29fcd69eb9085843896686806fd453a1285723.

Revert "feat: allow user configure remote model from my model (#3348)"

This reverts commit fdab8af057f80cf1ccaae0dc42c4e5161925f51e.

Revert "chore: ui fix button outline for configure cloud model (#3347)"

This reverts commit fe8ed1f26dc86ead92ffea4f36e2989caf7dad88.

Revert "feat: move icon create new thread into top panel (#3346)"

This reverts commit 46cb1b45b997181e2188f8dafb2fc0d0cc12ddcd.

Revert "chore(UI): update experience model dropdown (#3342)"

This reverts commit 8b44613015a907dc491113aeb99c963080424892.

Revert "Chore/simple bug template and correct a copy (#3344)"

This reverts commit 23cd5fd3979e7529811045da5c4912369bcc7532.

Revert "chore(ui): fix alignment loader starter screen (#3338)"

This reverts commit e9f5d2f837ce323b0851ea04cded913ab433388c.

Revert "Increase retry upload to R2 to 5 times (#3337)"

This reverts commit dcfb497934edc795955d971b6d391ee1e6309a03.

Revert "fix: broken jan build - add log trace (jan.log) (#3336)"

This reverts commit 77422c3a7ed240909942ac0d8c4b259af8d87a28.

Revert "chore: disable quick ask (#3334)"

This reverts commit 6e4b6b09ae009149f262d86d5b19bb8096267c19.

Revert "fix: update legacy path (#3328)"

This reverts commit 5eb112142c6431cfe0cdf11ce28810ca650a5427.

Revert "chore: add cortex version (#3318)"

This reverts commit 60587649c56a1f24272e763f25aa5b4042f7719a.

Revert "fix: broken app due to incorrect api path (#3316)"

This reverts commit 3de4eab2a0dfbf9f593d73b9dde6bca1d9df2279.

Revert "feat: modal waiting cortex (#3306)"

This reverts commit 1f5168d4af9080b867c19d334c398bf32e4f54b8.

Revert "fix: refresh should not create new thread (#3314)"

This reverts commit 624d07703c50ea332ed4eeac9dc3a26bc8190d08.

Revert "fix: avoid lose title threads (#3307)"

This reverts commit a4f5fda104c2d1e01ea72798f055e5b4e3cfd616.

Revert "feat: change data folder (#3309)"

This reverts commit b43242b9b24352c7f90995eccab753dede679616.

Revert "feat: embed cortex into jan as a js module (#3305)"

This reverts commit b348110fb73bd5f13c69f1b915168687dea776d0.

Revert "fix: migration item in setting detail omit buttons (#3298)"

This reverts commit 709204b2bc9d9ed08e2245cbb084482f5908ab3a.

Revert "fix: merge gpu arch and os tensorrt models (#3299)"

This reverts commit aa7dbdc9fa701debeee28d9c7eb4af6258685321.

Revert "chore: update cortex new version (#3300)"

This reverts commit 602097909d38b4874db8b9f19a729c65a0ac9619.

Revert "fix: engine logo on model dropdown (#3291)"

This reverts commit 8eb8611c28f6c4cdf1ab142a6e18c82bcc4c2073.

Revert "fix: icon setting can close and open right panel (#3295)"

This reverts commit be31e9315e2df5c483de3f46bd37740d277cfccd.

Revert "fix: error while importing local model is not shown (#3294)"

This reverts commit 26be941e8426462e1e3a28e5b9bf1f834f462f82.

Revert "fix: add lower case quantization support (#3293)"

This reverts commit 3135ccc27e894a4056f882cd25f0bf7e10e56f49.

Revert "fix: onnx can't be selected in download model modal (#3283)"

This reverts commit 2521e1db518e9e01493e89dcc98c181ccd2b48a2.

Revert "feat: add chunk count (#3290)"

This reverts commit bad481bf05aa38edcf553e1273f5d692a65c9225.

Revert "fix: RAM always show 0% (#3287)"

This reverts commit 2201e6c5f87538b953503937fe6b135fe1aa2d94.

Revert "fix: remote engine should not allow reinit (#3284)"

This reverts commit 98abff0da3467c090618233db12a25bfa4c1db69.

Revert "chore": update minor UI (#3281)"

This reverts commit 105a9aa1a1830648a32ae285f751b4078c8ac2b2.

Revert "chore: update z-index tooltip (#3280)"

This reverts commit 5a81865508c205ed8c54df209092553a0c40054f.

Revert "feat: add nvidia engine (#3279)"

This reverts commit 8372f30f0ee99606b123351e7bb62636c62c8b23.

Revert "fix: migration wrong directory (#3278)"

This reverts commit 7fb1354287677f577070ccb065ed3a5f9e5b9882.

Revert "fix: clearer app loading prompt (#3275)"

This reverts commit 44a6401000334b79b225ab6fd6afb79f9da4bd51.

Revert "fix: allow user to reinit engine from settings page (#3277)"

This reverts commit 57cf3c7b3d5bface785763d06813906ba6eab7c9.

Revert "feat: enable copy over instructions (#3266)"

This reverts commit 2074511067201f0addb9d274cc90d1e782f2bc1d.

Revert "chore: toast message on model import fail with reason (#3276)"

This reverts commit 3bebdfe67e1571c7414065a36d16eb5941115ee0.

Revert "fix: should not let second instance terminate cortex (#3274)"

This reverts commit d074a5a445b73ca195a49814a935300f9e895aaa.

Revert "chore: remnove focus button (#3272)"

This reverts commit 07fa79e71a401becdbc0f474c27b860654a8bd62.

Revert "chore: update hub search result (#3273)"

This reverts commit 10b4a9087af709d147b34f6c3ee63d2d3b75c77a.

Revert "chore: temporary hidden import model (#3270)"

This reverts commit db5d8aba454fd4cc1e07253ca4805d4b1b3e7fb2.

Revert "fix: set cortex data folder path when starting jan (#3252)"

This reverts commit 91c77eda78ecd251d480e58b853fe7b261f6de50.

Revert "fix: remote model added manually does not shown in model drop down (#3261)"

This reverts commit 224ca3f7cc25b2577ab123829907964b78b78aa8.

Revert "feat: add more options for cortex popup (#3236)"

This reverts commit 5e06ed8a122aaed9d68fbd04ce42b65bf8987e58.

Revert "feat: manage cloud models from threads screen (#3223)"

This reverts commit 37a3c4f844419e66cfe3f2a9ff79ba688538241f.

Revert "chore: check the legacy incompatible message type (#3248)"

This reverts commit c10caf8d7f1f9cf68551e41de5d54cd4450cf44a.

Revert "chore: minor copy for grammar (#3235)"

This reverts commit f0f23078f31f58e01ba27787d6926f5c1eb2ff0b.

Revert "fix: add back normalize message function (#3234)"

This reverts commit 83579df3a40ff61eac25975da8295fceaec679dc.

Revert "chore: update conditional starter screen after cortex load (#3227)"

This reverts commit 4d3a97f1dca9e6c3ea746586e8607541f2d1c0b3.

Revert "fix: broken status parse due to empty category (#3233)"

This reverts commit 68714eeaf9212a6fdacd5c6a48d8691db9cc99eb.

Revert "feat: make scroll area type auto for make default visible scrollbar (#3220)"

This reverts commit 13428d60e7d3ea6a24c0df8871ea13e2dec0d5fd.

Revert "fix: update new api from cortex to support 0.5.0 (#3221)"

This reverts commit ec9b5bf682a8676e132a08075b6ae03cf9e23132.

Revert "feat: new starter screen (#3217)"

This reverts commit e8ee694abd33b34112d2c7d09f8c03370c2d22cc.

Revert "bump-cortex-0.5.0-1 (#3218)"

This reverts commit 5369da78f5b83b1c8761cb48820ccf3111728a90.

Revert "Deprecate Docker and K8s (#3219)"

This reverts commit 7611a05c44982d07465bec57658d5bf965f30ad5.

Revert "chore: set container max width for chat message and new hub screen (#3213)"

This reverts commit 007daa71616268b0e741e7a890b319401e49a81e.

Revert "feat: integrating cortex (#3001)"

This reverts commit 101268f6f36df96b62982a9eeb8581ebe103a909.
2024-08-15 10:44:47 +07:00

186 lines
7.2 KiB
Python

from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from typing import Any, Callable
from .gguf_writer import GGUFWriter
class SpecialVocab:
merges: list[str]
add_special_token: dict[str, bool]
special_token_ids: dict[str, int]
chat_template: str | None
def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False,
special_token_types: tuple[str, ...] | None = None,
n_vocab: int | None = None,
):
self.special_token_ids = {}
self.add_special_token = {}
self.n_vocab = n_vocab
self.load_merges = load_merges
self.merges = []
self.chat_template = None
if special_token_types is not None:
self.special_token_types = special_token_types
else:
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
self._load(Path(path))
def __repr__(self) -> str:
return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
)
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
if self.merges:
if not quiet:
print(f'gguf: Adding {len(self.merges)} merge(s).')
gw.add_token_merges(self.merges)
elif self.load_merges:
print(
'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.',
file = sys.stderr,
)
for typ, tokid in self.special_token_ids.items():
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
if id_handler is None:
print(
f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
file = sys.stderr,
)
continue
if not quiet:
print(f'gguf: Setting special token type {typ} to {tokid}')
id_handler(tokid)
for typ, value in self.add_special_token.items():
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
if add_handler is None:
print(
f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping',
file = sys.stderr,
)
continue
if not quiet:
print(f'gguf: Setting add_{typ}_token to {value}')
add_handler(value)
if self.chat_template is not None:
if not quiet:
print(f'gguf: Setting chat_template to {self.chat_template}')
gw.add_chat_template(self.chat_template)
def _load(self, path: Path) -> None:
self._try_load_from_tokenizer_json(path)
self._try_load_from_config_json(path)
if self.load_merges and not self.merges:
self._try_load_merges_txt(path)
def _try_load_merges_txt(self, path: Path) -> bool:
merges_file = path / 'merges.txt'
if not merges_file.is_file():
return False
with open(merges_file, 'r', encoding = 'utf-8') as fp:
first_line = next(fp, '').strip()
if not first_line.startswith('#'):
fp.seek(0)
line_num = 0
else:
line_num = 1
merges = []
for line in fp:
line_num += 1
line = line.strip()
if not line:
continue
parts = line.split(None, 3)
if len(parts) != 2:
print(
f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring',
file = sys.stderr,
)
continue
merges.append(f'{parts[0]} {parts[1]}')
self.merges = merges
return True
def _set_special_token(self, typ: str, tid: Any) -> None:
if not isinstance(tid, int):
return
if tid < 0:
raise ValueError(f'invalid value for special token type {typ}: {tid}')
if self.n_vocab is None or tid < self.n_vocab:
if typ in self.special_token_ids:
return
self.special_token_ids[typ] = tid
return
print(
f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
file = sys.stderr,
)
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
tokenizer_file = path / 'tokenizer.json'
if tokenizer_file.is_file():
with open(tokenizer_file, encoding = 'utf-8') as f:
tokenizer = json.load(f)
if self.load_merges:
merges = tokenizer.get('model', {}).get('merges')
if isinstance(merges, list) and merges and isinstance(merges[0], str):
self.merges = merges
added_tokens = tokenizer.get('added_tokens', {})
else:
added_tokens = {}
tokenizer_config_file = path / 'tokenizer_config.json'
if not tokenizer_config_file.is_file():
return True
with open(tokenizer_config_file, encoding = 'utf-8') as f:
tokenizer_config = json.load(f)
chat_template = tokenizer_config.get('chat_template')
if chat_template is None or isinstance(chat_template, str):
self.chat_template = chat_template
else:
print(
f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring',
file = sys.stderr
)
for typ in self.special_token_types:
add_entry = tokenizer_config.get(f'add_{typ}_token')
if isinstance(add_entry, bool):
self.add_special_token[typ] = add_entry
if not added_tokens:
# We will need this to get the content for the token, so if it's empty
# may as well just give up.
continue
entry = tokenizer_config.get(f'{typ}_token')
if isinstance(entry, str):
tc_content = entry
elif isinstance(entry, dict):
entry_content = entry.get('content')
if not isinstance(entry_content, str):
continue
tc_content = entry_content
else:
continue
# We only need the first match here.
maybe_token_id = next(
(atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
None,
)
self._set_special_token(typ, maybe_token_id)
return True
def _try_load_from_config_json(self, path: Path) -> bool:
config_file = path / 'config.json'
if not config_file.is_file():
return False
with open(config_file, encoding = 'utf-8') as f:
config = json.load(f)
for typ in self.special_token_types:
self._set_special_token(typ, config.get(f'{typ}_token_id'))
return True