Spaces:

Zhdantim
/

simple_paper_classifier

Sleeping

App Files Files Community

timofeyzhdanovich commited on 24 days ago

Commit

bba0e26

1 Parent(s): ce274fb

init commit

Browse files

Files changed (3) hide show

app.py +56 -0
classes.tsv +39 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import pandas as pd
+from torch.nn import Softmax
+from torch import sort
+@st.cache_resource  # кэширование
+def load_model():
+    return AutoModelForSequenceClassification.from_pretrained('zhdantim/mydeberta-v3-small').eval()
+@st.cache_resource  # кэширование
+def load_id2classes():
+    return pd.read_csv('classes.tsv', sep='\t', index_col=0).to_dict()['classes']
+@st.cache_resource
+def load_tokenizer():
+    return AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
+model = load_model()
+id2classes = load_id2classes()
+tokenizer = load_tokenizer()
+def get_top_classes(text):
+    tokenized_text = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
+    logits = model(**tokenized_text).logits.detach()
+    probs = Softmax()(logits)
+    probs_sorted, indices = sort(probs, descending=True)
+    k = 1
+    while sum(probs_sorted[0, :k]) < 0.95:
+        k += 1
+    return [id2classes[idx.item()] for idx in indices[0, :k]], probs_sorted[0, :k]
+st.title("Простой классификатор статей")
+title = st.text_input(label="Введите название статьи (обязательно)", value="Type Here ...")
+abstract = st.text_input(label="Введите abstract", value="Type Here ...")
+if st.button('Submit'):
+    if title.title() != 'Type Here ...':
+        if abstract.title() != 'Type Here ...':
+            text = title.title() + '\n' + abstract.title()
+        else:
+            text = title.title()
+        top_classes, probs = get_top_classes(text)
+        for p, cls in zip(probs, top_classes):
+            st.success(f'Статья относится к {cls} с вероятностью {p}')
+    else:
+        st.error('Введите название статьи')

classes.tsv ADDED Viewed

	@@ -0,0 +1,39 @@

+	classes
+0	cmp-lg
+1	math
+2	comp-gas
+3	acc-phys
+4	patt-sol
+5	funct-an
+6	hep-th
+7	ao-sci
+8	mtrl-th
+9	adap-org
+10	bayes-an
+11	alg-geom
+12	nlin
+13	chao-dyn
+14	chem-ph
+15	cond-mat
+16	math-ph
+17	eess
+18	hep-ph
+19	plasm-ph
+20	dg-ga
+21	stat
+22	econ
+23	nucl-th
+24	q-alg
+25	atom-ph
+26	hep-ex
+27	q-bio
+28	cs
+29	hep-lat
+30	quant-ph
+31	astro-ph
+32	nucl-ex
+33	q-fin
+34	solv-int
+35	physics
+36	gr-qc
+37	supr-con

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+pandas
+transformers
+torch