Lord-Raven commited on
Commit
5825e6d
·
1 Parent(s): b8594c5

Messing with configuration.

Browse files
Files changed (2) hide show
  1. app.py +6 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import spaces
 
2
  import gradio
3
  import json
4
  import onnxruntime
@@ -19,8 +20,8 @@ app.add_middleware(
19
  allow_headers=["*"],
20
  )
21
 
22
- # print(f"Is CUDA available: {torch.cuda.is_available()}")
23
- # print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
24
 
25
  # "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
26
  # "xenova/deberta-v3-base-tasksource-nli" Not impressed
@@ -36,8 +37,8 @@ tokenizer_name = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"
36
  # model = ORTModelForSequenceClassification.from_pretrained(model_name, export=True, provider="CUDAExecutionProvider")
37
  # tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, model_max_length=512)
38
 
39
- # classifier = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0")
40
- classifier = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name)
41
 
42
  def classify(data_string, request: gradio.Request):
43
  if request:
@@ -49,7 +50,7 @@ def classify(data_string, request: gradio.Request):
49
  # else:
50
  return zero_shot_classification(data)
51
 
52
- @spaces.GPU
53
  def zero_shot_classification(data):
54
  results = classifier(data['sequence'], candidate_labels=data['candidate_labels'], hypothesis_template=data['hypothesis_template'], multi_label=data['multi_label'])
55
  response_string = json.dumps(results)
 
1
  import spaces
2
+ import torch
3
  import gradio
4
  import json
5
  import onnxruntime
 
20
  allow_headers=["*"],
21
  )
22
 
23
+ print(f"Is CUDA available: {torch.cuda.is_available()}")
24
+ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
25
 
26
  # "xenova/mobilebert-uncased-mnli" "typeform/mobilebert-uncased-mnli" Fast but small--same as bundled in Statosphere
27
  # "xenova/deberta-v3-base-tasksource-nli" Not impressed
 
37
  # model = ORTModelForSequenceClassification.from_pretrained(model_name, export=True, provider="CUDAExecutionProvider")
38
  # tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, model_max_length=512)
39
 
40
+ classifier = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name, device="cuda:0")
41
+ # classifier = pipeline(task="zero-shot-classification", model=model_name, tokenizer=tokenizer_name)
42
 
43
  def classify(data_string, request: gradio.Request):
44
  if request:
 
50
  # else:
51
  return zero_shot_classification(data)
52
 
53
+ @spaces.GPU(duration=10)
54
  def zero_shot_classification(data):
55
  results = classifier(data['sequence'], candidate_labels=data['candidate_labels'], hypothesis_template=data['hypothesis_template'], multi_label=data['multi_label'])
56
  response_string = json.dumps(results)
requirements.txt CHANGED
@@ -3,5 +3,5 @@ fastapi==0.88.0
3
  huggingface_hub==0.26.0
4
  json5==0.9.25
5
  numpy
6
- optimum[onnxruntime]==1.24.0
7
  transformers==4.36
 
3
  huggingface_hub==0.26.0
4
  json5==0.9.25
5
  numpy
6
+ optimum[onnxruntime-gpu]==1.24.0
7
  transformers==4.36