Clémentine commited on
Commit
ab1227e
·
1 Parent(s): be6a58f

only show relevant columns as eval goes on

Browse files
Files changed (1) hide show
  1. yourbench_space/utils.py +3 -2
yourbench_space/utils.py CHANGED
@@ -67,12 +67,13 @@ def update_dataset(stages, hf_org, hf_prefix):
67
 
68
  # TODO: add cache dir
69
  # Will be able to group everything in one pass once the names get homogeneized
 
70
  if "ingestion" in stages:
71
  # TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
72
- ingestion_ds = load_dataset(dataset_name, name="ingested", split="train", streaming=True)
73
  ingestion_df = pd.DataFrame([next(iter(ingestion_ds)) for _ in range(5)])
74
  if "summarization" in stages:
75
- summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True)
76
  summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
77
  if "single_shot_question_generation" in stages:
78
  single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True)
 
67
 
68
  # TODO: add cache dir
69
  # Will be able to group everything in one pass once the names get homogeneized
70
+ # TODO: make sure the questions are loaded with a set
71
  if "ingestion" in stages:
72
  # TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
73
+ ingestion_ds = load_dataset(dataset_name, name="ingested", split="train").select_columns("document_text")
74
  ingestion_df = pd.DataFrame([next(iter(ingestion_ds)) for _ in range(5)])
75
  if "summarization" in stages:
76
+ summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
77
  summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
78
  if "single_shot_question_generation" in stages:
79
  single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True)