Clémentine commited on
Commit
8ac5b07
·
1 Parent(s): f176095

update oauth to follow the session

Browse files
yourbench_space/app.py CHANGED
@@ -108,7 +108,7 @@ def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_na
108
  eval_ds_name = f"{org_name}/{eval_name}"
109
  # Test dataset existence
110
  try:
111
- load_dataset(eval_ds_name, streaming=True)
112
  except Exception as e:
113
  print(f"Error while loading the dataset: {e}")
114
  return
@@ -232,7 +232,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
232
  answers_df = gr.DataFrame()
233
 
234
  stages_table.change(
235
- update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_name], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
236
  )
237
 
238
  log_timer = gr.Timer(1.0, active=True)
 
108
  eval_ds_name = f"{org_name}/{eval_name}"
109
  # Test dataset existence
110
  try:
111
+ load_dataset(eval_ds_name, streaming=True, token=oauth_token.token)
112
  except Exception as e:
113
  print(f"Error while loading the dataset: {e}")
114
  return
 
232
  answers_df = gr.DataFrame()
233
 
234
  stages_table.change(
235
+ update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_name, login_btn], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
236
  )
237
 
238
  log_timer = gr.Timer(1.0, active=True)
yourbench_space/utils.py CHANGED
@@ -52,7 +52,7 @@ def save_files(uuid: str, files: List[pathlib.Path]) -> str:
52
  else "No files were saved"
53
  )
54
 
55
- def update_dataset(stages, hf_org, hf_prefix):
56
  """
57
  Updates the dataset based on the provided stages and dataset configuration.
58
  """
@@ -66,16 +66,16 @@ def update_dataset(stages, hf_org, hf_prefix):
66
 
67
  if "ingestion" in stages:
68
  # TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
69
- ingestion_ds = load_dataset(dataset_name, name="ingested", split="train").select_columns("document_text")
70
  ingestion_df = pd.DataFrame(ingestion_ds[0]) # only one row
71
  if "summarization" in stages:
72
- summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
73
  summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
74
  if "single_shot_question_generation" in stages:
75
- single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True)
76
  single_hop_df = pd.DataFrame([next(iter(single_hop_ds)) for _ in range(5)])
77
  if "answer_generation" in stages:
78
- answers_ds = load_dataset(dataset_name, name="answer_generation", split="train", streaming=True)
79
  answers_df = pd.DataFrame([next(iter(answers_ds)) for _ in range(5)])
80
 
81
  return (ingestion_df, summarization_df, single_hop_df, answers_df)
 
52
  else "No files were saved"
53
  )
54
 
55
+ def update_dataset(stages, hf_org, hf_prefix, oauth_token: gr.OAuthToken):
56
  """
57
  Updates the dataset based on the provided stages and dataset configuration.
58
  """
 
66
 
67
  if "ingestion" in stages:
68
  # TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
69
+ ingestion_ds = load_dataset(dataset_name, name="ingested", split="train", token=oauth_token.token).select_columns("document_text")
70
  ingestion_df = pd.DataFrame(ingestion_ds[0]) # only one row
71
  if "summarization" in stages:
72
+ summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True, token=oauth_token.token).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
73
  summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
74
  if "single_shot_question_generation" in stages:
75
+ single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True, token=oauth_token.token)
76
  single_hop_df = pd.DataFrame([next(iter(single_hop_ds)) for _ in range(5)])
77
  if "answer_generation" in stages:
78
+ answers_ds = load_dataset(dataset_name, name="answer_generation", split="train", streaming=True, token=oauth_token.token)
79
  answers_df = pd.DataFrame([next(iter(answers_ds)) for _ in range(5)])
80
 
81
  return (ingestion_df, summarization_df, single_hop_df, answers_df)