kz209 commited on
Commit
d092d11
·
1 Parent(s): 80a8eaa
README.md CHANGED
@@ -78,4 +78,4 @@ For bug fixes or questions, either open an issue or create a branch prefixed wit
78
 
79
  ## Accknowledgement
80
 
81
- Thanks for the GPU grant from Huggingface.
 
78
 
79
  ## Accknowledgement
80
 
81
+ Thanks for the GPU grant from Huggingface.
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import gradio as gr
2
 
3
  from pages.arena import create_arena
4
- from pages.summarization_playground import create_summarization_interface
5
- from pages.leaderboard import create_leaderboard
6
  from pages.batch_evaluation import create_batch_evaluation_interface
 
 
 
7
 
8
  def welcome_message():
9
  return """## Clinical Dialogue Summarization
 
1
  import gradio as gr
2
 
3
  from pages.arena import create_arena
 
 
4
  from pages.batch_evaluation import create_batch_evaluation_interface
5
+ from pages.leaderboard import create_leaderboard
6
+ from pages.summarization_playground import create_summarization_interface
7
+
8
 
9
  def welcome_message():
10
  return """## Clinical Dialogue Summarization
pages/arena.py CHANGED
@@ -1,11 +1,12 @@
 
1
  import random
 
2
  import gradio as gr
3
- import json
4
 
 
5
  from utils.data import dataset
6
  from utils.multiple_stream import stream_data
7
- from pages.summarization_playground import get_model_batch_generation
8
- from pages.summarization_playground import custom_css
9
 
10
  def random_data_selection():
11
  datapoint = random.choice(dataset)
 
1
+ import json
2
  import random
3
+
4
  import gradio as gr
 
5
 
6
+ from pages.summarization_playground import custom_css, get_model_batch_generation
7
  from utils.data import dataset
8
  from utils.multiple_stream import stream_data
9
+
 
10
 
11
  def random_data_selection():
12
  datapoint = random.choice(dataset)
pages/batch_evaluation.py CHANGED
@@ -1,17 +1,14 @@
1
- from dotenv import load_dotenv
2
- import gradio as gr
3
-
4
- import json
5
  import html
 
6
  import logging
7
 
 
8
  import numpy as np
 
9
 
10
- from utils.model import Model
11
  from utils.metric import metric_rouge_score
12
-
13
- from pages.summarization_playground import generate_answer
14
- from pages.summarization_playground import custom_css
15
 
16
  load_dotenv()
17
 
 
 
 
 
 
1
  import html
2
+ import json
3
  import logging
4
 
5
+ import gradio as gr
6
  import numpy as np
7
+ from dotenv import load_dotenv
8
 
9
+ from pages.summarization_playground import custom_css, generate_answer
10
  from utils.metric import metric_rouge_score
11
+ from utils.model import Model
 
 
12
 
13
  load_dotenv()
14
 
pages/leaderboard.py CHANGED
@@ -1,7 +1,9 @@
1
  import html
2
  import json
3
- import pandas as pd
4
  import gradio as gr
 
 
5
 
6
  # Function to create HTML tooltips
7
  def create_html_with_tooltip(id, base_url):
 
1
  import html
2
  import json
3
+
4
  import gradio as gr
5
+ import pandas as pd
6
+
7
 
8
  # Function to create HTML tooltips
9
  def create_html_with_tooltip(id, base_url):
pages/summarization_playground.py CHANGED
@@ -1,14 +1,13 @@
1
- from dotenv import load_dotenv
2
- import gradio as gr
3
  import random
4
 
5
- from utils.model import Model
6
- from utils.data import dataset
7
-
8
- import gc
9
  import torch
 
10
 
11
- import logging
 
12
 
13
  load_dotenv()
14
 
 
1
+ import gc
2
+ import logging
3
  import random
4
 
5
+ import gradio as gr
 
 
 
6
  import torch
7
+ from dotenv import load_dotenv
8
 
9
+ from utils.data import dataset
10
+ from utils.model import Model
11
 
12
  load_dotenv()
13
 
prompt/prompt.ipynb CHANGED
@@ -15,7 +15,7 @@
15
  " \"author\": \"Shunxi Wu\",\n",
16
  " \"metric\": {\n",
17
  " \"Rouge\": 0.14,\n",
18
- " \"winning_number\": 10\n",
19
  " },\n",
20
  " \"url\": \"https://docs.google.com/spreadsheets/d/1ui9ccRkzeMWAiJiRgr2ClpYTAK4uFhX44aXi0WDJY8Q/edit?gid=1699794338#gid=1699794338&range=D2\"\n",
21
  " },\n",
 
15
  " \"author\": \"Shunxi Wu\",\n",
16
  " \"metric\": {\n",
17
  " \"Rouge\": 0.14,\n",
18
+ " \"winning_number\": 11\n",
19
  " },\n",
20
  " \"url\": \"https://docs.google.com/spreadsheets/d/1ui9ccRkzeMWAiJiRgr2ClpYTAK4uFhX44aXi0WDJY8Q/edit?gid=1699794338#gid=1699794338&range=D2\"\n",
21
  " },\n",
requirements.txt CHANGED
@@ -9,4 +9,5 @@ torchvision
9
  torchaudio
10
  datasets
11
  rouge-score
12
- markdown
 
 
9
  torchaudio
10
  datasets
11
  rouge-score
12
+ markdown
13
+ vllm
utils/model.py CHANGED
@@ -1,12 +1,12 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
2
- import torch
3
 
 
4
  from huggingface_hub import login
5
- import os
 
6
 
7
- import logging
8
-
9
- login(token = os.getenv('HF_TOKEN'))
10
 
11
  class Model(torch.nn.Module):
12
  number_of_models = 0
@@ -23,89 +23,106 @@ class Model(torch.nn.Module):
23
 
24
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
25
  self.name = model_name
 
26
 
27
- logging.info(f'start loading model {self.name}')
28
 
29
- if model_name == "google-t5/t5-large":
30
- # For T5 or any other Seq2Seq model
31
- self.model = AutoModelForSeq2SeqLM.from_pretrained(
32
- model_name, torch_dtype=torch.bfloat16, device_map="auto"
 
 
 
33
  )
34
  else:
35
- # For GPT-like models or other causal language models
36
- self.model = AutoModelForCausalLM.from_pretrained(
37
- model_name, torch_dtype=torch.bfloat16, device_map="auto"
 
 
38
  )
 
39
 
40
  logging.info(f'Loaded model {self.name}')
41
-
42
- self.model.eval()
43
  self.update()
44
 
45
  @classmethod
46
  def update(cls):
47
  cls.number_of_models += 1
48
 
49
- def return_mode_name(self):
50
- return self.name
51
-
52
- def return_tokenizer(self):
53
- return self.tokenizer
54
-
55
- def return_model(self):
56
- return self.model
57
-
58
- def streaming(self, content_list, temp=0.001, max_length=500, do_sample=True):
59
- # Convert list of texts to input IDs
60
- input_ids = self.tokenizer(content_list, return_tensors="pt", padding=True, truncation=True).input_ids.to(self.model.device)
61
-
62
- # Set up the initial generation parameters
63
- gen_kwargs = {
64
- "input_ids": input_ids,
65
- "do_sample": do_sample,
66
- "temperature": temp,
67
- "eos_token_id": self.tokenizer.eos_token_id,
68
- "max_new_tokens": 1, # Generate one token at a time
69
- "return_dict_in_generate": True,
70
- "output_scores": True
71
- }
72
-
73
- # Generate and yield tokens one by one
74
- generated_tokens = 0
75
- batch_size = input_ids.shape[0]
76
- active_sequences = torch.arange(batch_size)
77
 
78
- while generated_tokens < max_length and len(active_sequences) > 0:
79
- with torch.no_grad():
80
- output = self.model.generate(**gen_kwargs)
 
 
 
 
 
 
81
 
82
- next_tokens = output.sequences[:, -1].unsqueeze(-1)
83
 
84
- # Yield the newly generated tokens for each sequence in the batch
85
- for i, token in zip(active_sequences, next_tokens):
86
- yield i, self.tokenizer.decode(token[0], skip_special_tokens=True)
87
-
88
- # Update input_ids for the next iteration
89
- gen_kwargs["input_ids"] = torch.cat([gen_kwargs["input_ids"], next_tokens], dim=-1)
90
- generated_tokens += 1
91
-
92
- # Check for completed sequences
93
- completed = (next_tokens.squeeze(-1) == self.tokenizer.eos_token_id).nonzero().squeeze(-1)
94
- active_sequences = torch.tensor([i for i in active_sequences if i not in completed])
95
- if len(active_sequences) > 0:
96
- gen_kwargs["input_ids"] = gen_kwargs["input_ids"][active_sequences]
97
-
98
-
99
- def gen(self, content_list, temp=0.001, max_length=500, do_sample=True):
100
- # Convert list of texts to input IDs
101
- input_ids = self.tokenizer(content_list, return_tensors="pt", padding=True, truncation=True).input_ids.to(self.model.device)
102
-
103
- # Non-streaming generation (unchanged)
104
- outputs = self.model.generate(
105
- input_ids,
106
- max_new_tokens=max_length,
107
- do_sample=do_sample,
108
- temperature=temp,
109
- eos_token_id=self.tokenizer.eos_token_id,
110
- )
111
- return self.tokenizer.batch_decode(outputs[:, input_ids.shape[1]:], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
 
4
+ import torch
5
  from huggingface_hub import login
6
+ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
7
+ from vllm import LLM, SamplingParams
8
 
9
+ login(token=os.getenv('HF_TOKEN'))
 
 
10
 
11
  class Model(torch.nn.Module):
12
  number_of_models = 0
 
23
 
24
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
25
  self.name = model_name
26
+ self.use_vllm = model_name != "google-t5/t5-large"
27
 
28
+ logging.info(f'Start loading model {self.name}')
29
 
30
+ if self.use_vllm:
31
+ # 使用vLLM加载模型
32
+ self.llm = LLM(
33
+ model=model_name,
34
+ dtype="bfloat16",
35
+ tokenizer=model_name,
36
+ trust_remote_code=True
37
  )
38
  else:
39
+ # 加载原始transformers模型
40
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(
41
+ model_name,
42
+ torch_dtype=torch.bfloat16,
43
+ device_map="auto"
44
  )
45
+ self.model.eval()
46
 
47
  logging.info(f'Loaded model {self.name}')
 
 
48
  self.update()
49
 
50
  @classmethod
51
  def update(cls):
52
  cls.number_of_models += 1
53
 
54
+ def gen(self, content_list, temp=0.001, max_length=500, do_sample=True):
55
+ if self.use_vllm:
56
+ sampling_params = SamplingParams(
57
+ temperature=temp,
58
+ max_tokens=max_length,
59
+ top_p=0.95 if do_sample else 1.0,
60
+ stop_token_ids=[self.tokenizer.eos_token_id]
61
+ )
62
+ outputs = self.llm.generate(content_list, sampling_params)
63
+ return [output.outputs[0].text for output in outputs]
64
+ else:
65
+ input_ids = self.tokenizer(content_list, return_tensors="pt", padding=True, truncation=True).input_ids.to(self.model.device)
66
+ outputs = self.model.generate(
67
+ input_ids,
68
+ max_new_tokens=max_length,
69
+ do_sample=do_sample,
70
+ temperature=temp,
71
+ eos_token_id=self.tokenizer.eos_token_id,
72
+ )
73
+ return self.tokenizer.batch_decode(outputs[:, input_ids.shape[1]:], skip_special_tokens=True)
 
 
 
 
 
 
 
 
74
 
75
+ def streaming(self, content_list, temp=0.001, max_length=500, do_sample=True):
76
+ if self.use_vllm:
77
+ sampling_params = SamplingParams(
78
+ temperature=temp,
79
+ max_tokens=max_length,
80
+ top_p=0.95 if do_sample else 1.0,
81
+ stop_token_ids=[self.tokenizer.eos_token_id]
82
+ )
83
+ outputs = self.llm.generate(content_list, sampling_params, stream=True)
84
 
85
+ prev_token_ids = [[] for _ in content_list]
86
 
87
+ for output in outputs:
88
+ for i, request_output in enumerate(output.outputs):
89
+ current_token_ids = request_output.token_ids
90
+ new_token_ids = current_token_ids[len(prev_token_ids[i]):]
91
+ prev_token_ids[i] = current_token_ids.copy()
92
+
93
+ for token_id in new_token_ids:
94
+ token_text = self.tokenizer.decode(token_id, skip_special_tokens=True)
95
+ yield i, token_text
96
+ else:
97
+ input_ids = self.tokenizer(content_list, return_tensors="pt", padding=True, truncation=True).input_ids.to(self.model.device)
98
+
99
+ gen_kwargs = {
100
+ "input_ids": input_ids,
101
+ "do_sample": do_sample,
102
+ "temperature": temp,
103
+ "eos_token_id": self.tokenizer.eos_token_id,
104
+ "max_new_tokens": 1,
105
+ "return_dict_in_generate": True,
106
+ "output_scores": True
107
+ }
108
+
109
+ generated_tokens = 0
110
+ batch_size = input_ids.shape[0]
111
+ active_sequences = torch.arange(batch_size)
112
+
113
+ while generated_tokens < max_length and len(active_sequences) > 0:
114
+ with torch.no_grad():
115
+ output = self.model.generate(**gen_kwargs)
116
+
117
+ next_tokens = output.sequences[:, -1].unsqueeze(-1)
118
+
119
+ for i, token in zip(active_sequences, next_tokens):
120
+ yield i.item(), self.tokenizer.decode(token[0], skip_special_tokens=True)
121
+
122
+ gen_kwargs["input_ids"] = torch.cat([gen_kwargs["input_ids"], next_tokens], dim=-1)
123
+ generated_tokens += 1
124
+
125
+ completed = (next_tokens.squeeze(-1) == self.tokenizer.eos_token_id).nonzero().squeeze(-1)
126
+ active_sequences = torch.tensor([i for i in active_sequences if i not in completed])
127
+ if len(active_sequences) > 0:
128
+ gen_kwargs["input_ids"] = gen_kwargs["input_ids"][active_sequences]
utils/multiple_stream.py CHANGED
@@ -1,5 +1,6 @@
1
  import copy
2
  import random
 
3
  import gradio as gr
4
 
5
  TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have
 
1
  import copy
2
  import random
3
+
4
  import gradio as gr
5
 
6
  TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have