|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
import torch |
|
import os |
|
from dotenv import load_dotenv |
|
from huggingface_hub import login |
|
|
|
load_dotenv() |
|
|
|
|
|
hf_token = os.getenv('HF_TOKEN') |
|
login(hf_token) |
|
|
|
|
|
model_path = "Nac31/Sacha-Mistral-0" |
|
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16 |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_path, |
|
device_map="auto", |
|
torch_dtype=dtype |
|
) |
|
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
def generate_response(message, temperature=0.7, max_new_tokens=500): |
|
try: |
|
response = pipe( |
|
message, |
|
temperature=temperature, |
|
max_new_tokens=max_new_tokens, |
|
do_sample=True |
|
) |
|
return response[0]['generated_text'] |
|
except Exception as e: |
|
return f"Une erreur s'est produite : {str(e)}" |
|
|
|
|
|
demo = gr.Interface( |
|
fn=generate_response, |
|
inputs=[ |
|
gr.Textbox(label="Votre message", placeholder="Entrez votre message ici..."), |
|
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Température"), |
|
gr.Slider(minimum=10, maximum=2000, value=500, step=10, label="Nombre de tokens") |
|
], |
|
outputs=gr.Textbox(label="Réponse"), |
|
title="Chat avec Sacha-Mistral", |
|
description="Un assistant conversationnel en français basé sur le modèle Sacha-Mistral" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |