various optimizations

This commit is contained in:
2024-07-20 21:47:18 +02:00
parent 2f35689355
commit c38ac65d5b
4 changed files with 151 additions and 101 deletions

View File

@ -30,13 +30,13 @@ def smart_tokenizer_and_embedding_resize(
def get_tokenizer(model, cache_dir, model_args: ModelArguments):
print(f'Tokenizer: {model_args.tokenizer if model_args.tokenizer is not None else model_args.model_name_or_path}')
tokenizer_path = model_args.tokenizer if model_args.tokenizer is not None else model_args.model_name_or_path
print(f'Tokenizer: {tokenizer_path}')
tokenizer = transformers.AutoTokenizer.from_pretrained(
model_args.tokenizer if model_args.tokenizer is not None else model_args.model_name_or_path,
tokenizer_path,
cache_dir=cache_dir,
padding_side="right",
use_fast=False,
eos_token="[EOS]",
tokenizer_type='llama' if 'llama' in model_args.model_name_or_path else None,
trust_remote_code=model_args.trust_remote_code
)