Lord-Raven
Messing with configuration.
5825e6d
raw
history blame
3.98 kB
import spaces
import torch
import gradio
import json
import onnxruntime
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import pipeline
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
# CORS Config
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["https://statosphere-3704059fdd7e.c5v4v4jx6pq5.win","https://crunchatize-77a78ffcc6a6.c5v4v4jx6pq5.win","https://crunchatize-2-2b4f5b1479a6.c5v4v4jx6pq5.win","https://tamabotchi-2dba63df3bf1.c5v4v4jx6pq5.win","https://lord-raven.github.io"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
# "xenova/deberta-v3-base-tasksource-nli" Not impressed
# "Xenova/bart-large-mnli" A bit slow
# "Xenova/distilbert-base-uncased-mnli" "typeform/distilbert-base-uncased-mnli" Bad answers
# "Xenova/deBERTa-v3-base-mnli" "MoritzLaurer/DeBERTa-v3-base-mnli" Still a bit slow and not great answers
# "xenova/nli-deberta-v3-small" "cross-encoder/nli-deberta-v3-small" Was using this for a good while and it was...okay
model_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
# file_name = "onnx/model.onnx"
tokenizer_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
# model = ORTModelForSequenceClassification.from_pretrained(model_name, export=True, provider="CUDAExecutionProvider")
# tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, model_max_length=512)
classifier = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0")
# classifier = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name)
def classify(data_string, request: gradio.Request):
if request:
if request.headers["origin"] not in ["https://statosphere-3704059fdd7e.c5v4v4jx6pq5.win", "https://crunchatize-77a78ffcc6a6.c5v4v4jx6pq5.win", "https://crunchatize-2-2b4f5b1479a6.c5v4v4jx6pq5.win", "https://tamabotchi-2dba63df3bf1.c5v4v4jx6pq5.win", "https://ravenok-statosphere-backend.hf.space", "https://lord-raven.github.io"]:
return "{}"
data = json.loads(data_string)
# if 'task' in data and data['task'] == 'few_shot_classification':
# return few_shot_classification(data)
# else:
return zero_shot_classification(data)
@spaces.GPU(duration=10)
def zero_shot_classification(data):
results = classifier(data['sequence'], candidate_labels=data['candidate_labels'], hypothesis_template=data['hypothesis_template'], multi_label=data['multi_label'])
response_string = json.dumps(results)
return response_string
def create_sequences(data):
# return ['###Given:\n' + data['sequence'] + '\n###End Given\n###Hypothesis:\n' + data['hypothesis_template'].format(label) + "\n###End Hypothesis" for label in data['candidate_labels']]
return [data['sequence'] + '\n' + data['hypothesis_template'].format(label) for label in data['candidate_labels']]
# def few_shot_classification(data):
# sequences = create_sequences(data)
# print(sequences)
# # results = onnx_few_shot_model(sequences)
# probs = onnx_few_shot_model.predict_proba(sequences)
# scores = [true[0] for true in probs]
# composite = list(zip(scores, data['candidate_labels']))
# composite = sorted(composite, key=lambda x: x[0], reverse=True)
# labels, scores = zip(*composite)
# response_dict = {'scores': scores, 'labels': labels}
# print(response_dict)
# response_string = json.dumps(response_dict)
# return response_strin
gradio_interface = gradio.Interface(
fn = classify,
inputs = gradio.Textbox(label="JSON Input"),
outputs = gradio.Textbox()
)
gradio_interface.launch()