Spaces:

hyunseoki
/

ReMoDetect

Running

File size: 1,384 Bytes

604d33c
 
 
 
 
 
 
 
435e3d0
043c6d6
604d33c
165fdbc
4a3b112
604d33c
435e3d0
604d33c
 
 
 
 
 
435e3d0
 
604d33c

import gradio as gr
import gradio.components as grc
# from wmdetection.models import get_watermarks_detection_model
# from wmdetection.pipelines.predictor import WatermarksPredictor
import os, glob
import spaces
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

model_name = 'hyunseoki/ReMoDetect-deberta'

THESHOLD=3.0
predictor = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
predictor.eval()

@spaces.GPU
def predict(text):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    predictor.to(device)
    tokenized = tokenizer(text, return_tensors='pt', truncation=True, max_length=512).to(device)
    with torch.no_grad():
        result = predictor(**tokenized).logits[0].cpu().detach().item()
    AI_score = round(torch.sigmoid(torch.tensor(result-THESHOLD)*2).item(),2)
    return f'{AI_score*100} %', f'{round(result,2)}'

iface = gr.Interface(
    fn=predict, 
    title="ReMoDetect: Reward Model for LLM Generated Text Detection",
    description="The continuously finetuned reward model so that can classify LLM generated text from human writen text.",
    inputs=grc.Textbox(label='INPUT', placeholder="Type here..."), 
    # examples=examples, 
    outputs=[grc.Textbox(label="AI likelihood"), grc.Textbox(label="Raw score")],
)
iface.launch(share=True)