Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaya
commited on
Commit
·
25580aa
1
Parent(s):
79fbee7
Update process status and config
Browse files- yourbench_space/app.py +12 -2
- yourbench_space/config.py +3 -4
- yourbench_space/utils.py +56 -26
yourbench_space/app.py
CHANGED
@@ -60,6 +60,16 @@ def generate_and_return(hf_org, hf_prefix):
|
|
60 |
)
|
61 |
)
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
def prepare_task(oauth_token: gr.OAuthToken | None, hf_dataset_prefix: str, _=None):
|
65 |
new_env = os.environ.copy()
|
@@ -121,7 +131,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
121 |
file_input = gr.File(
|
122 |
label="Upload text files",
|
123 |
file_count="multiple",
|
124 |
-
file_types=[".txt", ".md", ".html"],
|
125 |
)
|
126 |
output = gr.Textbox(label="Log")
|
127 |
file_input.upload(
|
@@ -170,7 +180,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
170 |
with gr.Row():
|
171 |
process_status = gr.Checkbox(label="Process Status", interactive=False)
|
172 |
status_timer = gr.Timer(1.0, active=True)
|
173 |
-
status_timer.tick(
|
174 |
|
175 |
with gr.Row():
|
176 |
start_button = gr.Button("Start Task")
|
|
|
60 |
)
|
61 |
)
|
62 |
|
63 |
+
def update_process_status():
|
64 |
+
"""Update process status and include exit details if process has terminated"""
|
65 |
+
is_running = manager.is_running()
|
66 |
+
|
67 |
+
if not is_running:
|
68 |
+
exit_code, exit_reason = manager.get_exit_details()
|
69 |
+
status_text = f"Process Status: Stopped - {exit_reason}, exit code - {exit_code}" if exit_reason else "Process Status: Stopped"
|
70 |
+
return gr.update(value=False, label=status_text)
|
71 |
+
|
72 |
+
return gr.update(value=True, label="Process Status: Running")
|
73 |
|
74 |
def prepare_task(oauth_token: gr.OAuthToken | None, hf_dataset_prefix: str, _=None):
|
75 |
new_env = os.environ.copy()
|
|
|
131 |
file_input = gr.File(
|
132 |
label="Upload text files",
|
133 |
file_count="multiple",
|
134 |
+
file_types=[".txt", ".md", ".html", ".pdf"],
|
135 |
)
|
136 |
output = gr.Textbox(label="Log")
|
137 |
file_input.upload(
|
|
|
180 |
with gr.Row():
|
181 |
process_status = gr.Checkbox(label="Process Status", interactive=False)
|
182 |
status_timer = gr.Timer(1.0, active=True)
|
183 |
+
status_timer.tick(update_process_status, outputs=process_status)
|
184 |
|
185 |
with gr.Row():
|
186 |
start_button = gr.Button("Start Task")
|
yourbench_space/config.py
CHANGED
@@ -12,16 +12,15 @@ def generate_base_config(hf_org, hf_prefix):
|
|
12 |
"hf_organization": hf_org,
|
13 |
"hf_dataset_name": hf_prefix,
|
14 |
},
|
15 |
-
"local_dataset_dir": "results/",
|
16 |
"model_list": [
|
17 |
{
|
18 |
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
|
19 |
-
"provider": "
|
20 |
"max_concurrent_requests": 32,
|
21 |
},
|
22 |
{
|
23 |
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
24 |
-
"provider": "
|
25 |
"max_concurrent_requests": 32,
|
26 |
}
|
27 |
],
|
@@ -76,7 +75,7 @@ def generate_base_config(hf_org, hf_prefix):
|
|
76 |
],
|
77 |
},
|
78 |
"judge_answers": {
|
79 |
-
"run":
|
80 |
"comparing_strategies": [["zeroshot", "gold"]],
|
81 |
"chunk_column_index": 0,
|
82 |
"random_seed": 42,
|
|
|
12 |
"hf_organization": hf_org,
|
13 |
"hf_dataset_name": hf_prefix,
|
14 |
},
|
|
|
15 |
"model_list": [
|
16 |
{
|
17 |
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
|
18 |
+
"provider": "novita",
|
19 |
"max_concurrent_requests": 32,
|
20 |
},
|
21 |
{
|
22 |
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
23 |
+
"provider": "novita",
|
24 |
"max_concurrent_requests": 32,
|
25 |
}
|
26 |
],
|
|
|
75 |
],
|
76 |
},
|
77 |
"judge_answers": {
|
78 |
+
"run": False, # to change when fixed
|
79 |
"comparing_strategies": [["zeroshot", "gold"]],
|
80 |
"chunk_column_index": 0,
|
81 |
"random_seed": 42,
|
yourbench_space/utils.py
CHANGED
@@ -19,9 +19,8 @@ STAGES = [
|
|
19 |
"summarization",
|
20 |
"chunking",
|
21 |
"single_shot_question_generation",
|
22 |
-
"multi_hop_question_generation",
|
23 |
"answer_generation",
|
24 |
-
"judge_answers",
|
25 |
]
|
26 |
|
27 |
|
@@ -56,6 +55,7 @@ class SubprocessManager:
|
|
56 |
self.command = command
|
57 |
self.process = None
|
58 |
self.output_stream = io.StringIO()
|
|
|
59 |
|
60 |
def start_process(self, custom_env: dict | None):
|
61 |
"""Start the subprocess."""
|
@@ -64,20 +64,30 @@ class SubprocessManager:
|
|
64 |
return
|
65 |
|
66 |
self.output_stream = io.StringIO()
|
67 |
-
self.
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
def read_and_get_output(self):
|
80 |
"""Read subprocess output, capture it, and return log and completed stages."""
|
|
|
|
|
|
|
81 |
if self.process and self.process.stdout:
|
82 |
try:
|
83 |
while True:
|
@@ -89,10 +99,9 @@ class SubprocessManager:
|
|
89 |
except BlockingIOError:
|
90 |
pass
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
)
|
96 |
return current_output, completed_stages
|
97 |
|
98 |
def stop_process(self):
|
@@ -101,10 +110,13 @@ class SubprocessManager:
|
|
101 |
logger.info("Process is not running")
|
102 |
return
|
103 |
logger.info("Sending SIGTERM to the Process")
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
108 |
|
109 |
def kill_process(self):
|
110 |
"""Forcefully kill the subprocess"""
|
@@ -112,11 +124,29 @@ class SubprocessManager:
|
|
112 |
logger.info("Process is not running")
|
113 |
return
|
114 |
logger.info("Sending SIGKILL to the Process")
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
119 |
|
120 |
def is_running(self):
|
121 |
"""Check if the subprocess is still running"""
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"summarization",
|
20 |
"chunking",
|
21 |
"single_shot_question_generation",
|
|
|
22 |
"answer_generation",
|
23 |
+
# "judge_answers", # to uncomment when fixed
|
24 |
]
|
25 |
|
26 |
|
|
|
55 |
self.command = command
|
56 |
self.process = None
|
57 |
self.output_stream = io.StringIO()
|
58 |
+
self.exit_code = None
|
59 |
|
60 |
def start_process(self, custom_env: dict | None):
|
61 |
"""Start the subprocess."""
|
|
|
64 |
return
|
65 |
|
66 |
self.output_stream = io.StringIO()
|
67 |
+
self.exit_code = None
|
68 |
+
|
69 |
+
try:
|
70 |
+
logger.info(f"Starting process with command: {' '.join(self.command)}")
|
71 |
+
self.process = subprocess.Popen(
|
72 |
+
self.command,
|
73 |
+
stdout=subprocess.PIPE,
|
74 |
+
stderr=subprocess.STDOUT, # Combine stderr with stdout
|
75 |
+
text=True,
|
76 |
+
bufsize=1,
|
77 |
+
start_new_session=True,
|
78 |
+
env=custom_env,
|
79 |
+
)
|
80 |
+
os.set_blocking(self.process.stdout.fileno(), False)
|
81 |
+
logger.info(f"Started process with PID: {self.process.pid}")
|
82 |
+
except Exception as e:
|
83 |
+
logger.error(f"Failed to start process: {str(e)}")
|
84 |
+
return
|
85 |
|
86 |
def read_and_get_output(self):
|
87 |
"""Read subprocess output, capture it, and return log and completed stages."""
|
88 |
+
current_output = ""
|
89 |
+
completed_stages = []
|
90 |
+
|
91 |
if self.process and self.process.stdout:
|
92 |
try:
|
93 |
while True:
|
|
|
99 |
except BlockingIOError:
|
100 |
pass
|
101 |
|
102 |
+
current_output = self.output_stream.getvalue()
|
103 |
+
completed_stages = list(set(re.findall(r"Successfully completed stage: (\w+)", current_output)))
|
104 |
+
|
|
|
105 |
return current_output, completed_stages
|
106 |
|
107 |
def stop_process(self):
|
|
|
110 |
logger.info("Process is not running")
|
111 |
return
|
112 |
logger.info("Sending SIGTERM to the Process")
|
113 |
+
try:
|
114 |
+
self.process.terminate()
|
115 |
+
self.exit_code = self.process.wait(timeout=5) # Wait up to 5 seconds for process to terminate
|
116 |
+
logger.info(f"Process terminated by user with exit code {self.exit_code}")
|
117 |
+
except subprocess.TimeoutExpired:
|
118 |
+
logger.warning("Process did not terminate within timeout, sending SIGKILL")
|
119 |
+
self.kill_process()
|
120 |
|
121 |
def kill_process(self):
|
122 |
"""Forcefully kill the subprocess"""
|
|
|
124 |
logger.info("Process is not running")
|
125 |
return
|
126 |
logger.info("Sending SIGKILL to the Process")
|
127 |
+
try:
|
128 |
+
self.process.kill()
|
129 |
+
self.exit_code = self.process.wait(timeout=5) # Wait up to 5 seconds for process to be killed
|
130 |
+
logger.info(f"Process killed by user with exit code {self.exit_code}")
|
131 |
+
except subprocess.TimeoutExpired:
|
132 |
+
logger.error("Process could not be killed within timeout")
|
133 |
|
134 |
def is_running(self):
|
135 |
"""Check if the subprocess is still running"""
|
136 |
+
if self.process is None:
|
137 |
+
return False
|
138 |
+
|
139 |
+
return self.process.poll() is None
|
140 |
+
|
141 |
+
def get_exit_details(self):
|
142 |
+
"""Return exit code and reason if process has terminated"""
|
143 |
+
if self.process is None:
|
144 |
+
return None, "Process was never started"
|
145 |
+
|
146 |
+
if self.is_running():
|
147 |
+
return None, "Process is still running"
|
148 |
+
|
149 |
+
if not self.exit_code is None and self.exit_code != 0 :
|
150 |
+
return self.exit_code, "Process exited abnormaly"
|
151 |
+
|
152 |
+
return self.exit_code, "Process exited normaly"
|