|
import os |
|
from threading import Thread |
|
from typing import Iterator |
|
|
|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
|
|
|
|
|
MAX_MAX_NEW_TOKENS = 2048 |
|
DEFAULT_MAX_NEW_TOKENS = 1024 |
|
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) |
|
|
|
|
|
DESCRIPTION = """\ |
|
# Dany-1.0 |
|
Dany est une IA avancée. Elle est spécialisée dans les langages de programmation. |
|
""" |
|
|
|
|
|
if not torch.cuda.is_available(): |
|
print("CUDA n'est pas disponible. Exécution sur CPU 🥶. Le modèle sera plus lent.") |
|
model_id = "deepseek-ai/deepseek-coder-1.3b-instruct" |
|
model = AutoModelForCausalLM.from_pretrained(model_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
tokenizer.use_default_system_prompt = False |
|
else: |
|
print("CUDA est disponible. Exécution sur GPU 🚀.") |
|
model_id = "deepseek-ai/deepseek-coder-1.3b-instruct" |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, torch_dtype=torch.bfloat16, device_map="auto" |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
tokenizer.use_default_system_prompt = False |
|
|
|
|
|
DANY_SYSTEM_PROMPT = """ |
|
Tu es Dany, une intelligence artificielle avancée créée par Giovanni Lucas Correia, un programmeur ambitieux de 21 ans. |
|
Tu es spécialisée dans la programmation, en particulier en Python et Delphi. |
|
Tu peux parler trois langues : français, portugais et anglais. |
|
""" |
|
|
|
|
|
def generate( |
|
message: str, |
|
chat_history: list, |
|
system_prompt: str = DANY_SYSTEM_PROMPT, |
|
max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS, |
|
temperature: float = 1.0, |
|
top_k: int = 50, |
|
repetition_penalty: float = 1.0, |
|
) -> Iterator[str]: |
|
conversation = [] |
|
if system_prompt: |
|
conversation.append({"role": "system", "content": system_prompt}) |
|
for user, assistant in chat_history: |
|
conversation.extend( |
|
[{"role": "user", "content": user}, {"role": "assistant", "content": assistant}] |
|
) |
|
conversation.append({"role": "user", "content": message}) |
|
|
|
input_ids = tokenizer.apply_chat_template( |
|
conversation, return_tensors="pt", add_generation_prompt=True |
|
) |
|
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH: |
|
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:] |
|
gr.Warning( |
|
f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens." |
|
) |
|
input_ids = input_ids.to(model.device) |
|
|
|
streamer = TextIteratorStreamer( |
|
tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True |
|
) |
|
generate_kwargs = { |
|
"input_ids": input_ids, |
|
"streamer": streamer, |
|
"max_new_tokens": max_new_tokens, |
|
"do_sample": False, |
|
"num_beams": 1, |
|
"repetition_penalty": repetition_penalty, |
|
"eos_token_id": tokenizer.eos_token_id, |
|
} |
|
t = Thread(target=model.generate, kwargs=generate_kwargs) |
|
t.start() |
|
|
|
outputs = [] |
|
for text in streamer: |
|
outputs.append(text) |
|
yield "".join(outputs).replace("<|EOT|>", "") |
|
|
|
|
|
chat_interface = gr.ChatInterface( |
|
fn=generate, |
|
additional_inputs=[], |
|
stop_btn=None, |
|
examples=[ |
|
["Qui t'a créé et quelles sont tes compétences ?"], |
|
["Peux-tu écrire un algorithme de tri rapide en Python ?"], |
|
["Code du python."], |
|
], |
|
) |
|
|
|
|
|
|
|
|
|
with gr.Blocks(css=""" |
|
footer {visibility: hidden;} |
|
""") as demo: |
|
gr.Markdown(DESCRIPTION) |
|
chat_interface.render() |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True, show_api=False) |
|
|
|
|
|
|