Spaces:
Sleeping
Sleeping
Clémentine
commited on
Commit
·
ab1227e
1
Parent(s):
be6a58f
only show relevant columns as eval goes on
Browse files- yourbench_space/utils.py +3 -2
yourbench_space/utils.py
CHANGED
@@ -67,12 +67,13 @@ def update_dataset(stages, hf_org, hf_prefix):
|
|
67 |
|
68 |
# TODO: add cache dir
|
69 |
# Will be able to group everything in one pass once the names get homogeneized
|
|
|
70 |
if "ingestion" in stages:
|
71 |
# TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
|
72 |
-
ingestion_ds = load_dataset(dataset_name, name="ingested", split="train"
|
73 |
ingestion_df = pd.DataFrame([next(iter(ingestion_ds)) for _ in range(5)])
|
74 |
if "summarization" in stages:
|
75 |
-
summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True)
|
76 |
summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
|
77 |
if "single_shot_question_generation" in stages:
|
78 |
single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True)
|
|
|
67 |
|
68 |
# TODO: add cache dir
|
69 |
# Will be able to group everything in one pass once the names get homogeneized
|
70 |
+
# TODO: make sure the questions are loaded with a set
|
71 |
if "ingestion" in stages:
|
72 |
# TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
|
73 |
+
ingestion_ds = load_dataset(dataset_name, name="ingested", split="train").select_columns("document_text")
|
74 |
ingestion_df = pd.DataFrame([next(iter(ingestion_ds)) for _ in range(5)])
|
75 |
if "summarization" in stages:
|
76 |
+
summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
|
77 |
summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
|
78 |
if "single_shot_question_generation" in stages:
|
79 |
single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True)
|