Spaces:
Sleeping
Sleeping
Clémentine
commited on
Commit
·
5b3b8d7
1
Parent(s):
89c2cb6
last ds load fix
Browse files- yourbench_space/utils.py +2 -2
yourbench_space/utils.py
CHANGED
@@ -66,8 +66,8 @@ def update_dataset(stages: list, hf_org: str, hf_prefix: str, oauth_token: gr.OA
|
|
66 |
|
67 |
if "ingestion" in stages:
|
68 |
# TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
|
69 |
-
ingestion_ds = load_dataset(dataset_name, name="ingested", split="train", token=oauth_token.token).select_columns("document_text")
|
70 |
-
ingestion_df = pd.DataFrame([ingestion_ds
|
71 |
if "summarization" in stages:
|
72 |
summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True, token=oauth_token.token).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
|
73 |
summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
|
|
|
66 |
|
67 |
if "ingestion" in stages:
|
68 |
# TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
|
69 |
+
ingestion_ds = load_dataset(dataset_name, name="ingested", split="train", streaming=True, token=oauth_token.token).select_columns("document_text")
|
70 |
+
ingestion_df = pd.DataFrame([next(iter(ingestion_ds)) for _ in range(1)]) # only one row
|
71 |
if "summarization" in stages:
|
72 |
summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True, token=oauth_token.token).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
|
73 |
summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
|