|
import streamlit as st |
|
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer |
|
import os |
|
import torch |
|
|
|
from huggingface_hub import login |
|
|
|
login(os.getenv('HF_LOGIN')) |
|
|
|
token_step_size = 20 |
|
model_id = "utter-project/EuroLLM-1.7B-Instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, torch_dtype=torch.bfloat16) |
|
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16) |
|
model.generation_config.pad_token_id = tokenizer.pad_token_id |
|
|
|
inner = st.text_area('enter some input!') |
|
text = '<|im_start|>user\n'+inner+'<|im_end|>\n<|im_start|>assistant\n' |
|
|
|
if inner: |
|
inputs = tokenizer(text, return_tensors="pt") |
|
outputs = model.generate(**inputs, max_new_tokens=token_step_size) |
|
st.write(tokenizer.decode(outputs[0][-token_step_size:], skip_special_tokens=False)) |
|
|
|
while (not torch.any(outputs[0][-token_step_size:] == 4)): |
|
outputs = model.generate(input_ids=outputs, attention_mask=torch.ones_like(outputs),max_new_tokens=token_step_size) |
|
st.write(tokenizer.decode(outputs[0][-token_step_size:], skip_special_tokens=False)) |