Spaces:

PaddlePaddle
/

paddlepaddle_meets_llm

Running

File size: 1,477 Bytes

2fbe064
a5c6602
2fbe064
1405f2a
 
2fbe064
 
a5c6602
 
 
 
2fbe064
a5c6602
 
2fbe064
a5c6602
2fbe064
 
a5c6602
7335512
 
 
1405f2a
7335512
 
2fbe064
7335512
2fbe064
a5c6602
 
 
 
 
 
 
 
2fbe064

import gradio as gr
from paddlenlp.transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct", dtype="float32")


def inference(input_text):
    print(input_text)
    print(type(input_text))
    input_features = tokenizer(input_text, return_tensors="pd")

    outputs = model.generate(**input_features, max_new_tokens=128)#max_length=128)
    output_text = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)[0]

    return output_text


title = 'PaddlePaddle Meets LLM'
description = '''
- The underlying execution framework is based on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP).
- PaddleNLP supports a wide range of open-source LLMs. Check out the [full model list](https://github.com/PaddlePaddle/PaddleNLP?tab=readme-ov-file#%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81).
- We chose QWEN2-0.5B-Instruct as the model for this use case due to limited computational resources.
- [ERNIE 4.5](https://yiyan.baidu.com/) was trained with PaddlePaddle, [give it a try](https://huggingface.co./spaces/PaddlePaddle/ernie_demo)!
'''

examples = ['请自我介绍一下。']

demo = gr.Interface(
    inference,
    inputs="text",
    outputs="text",
    title=title,
    description=description,
    examples=examples,
    )


if __name__ == "__main__":
    demo.launch()