advanced

Sleeping

Clémentine commited on Mar 20

Commit

5b3b8d7

1 Parent(s): 89c2cb6

last ds load fix

Files changed (1) hide show

yourbench_space/utils.py CHANGED Viewed

@@ -66,8 +66,8 @@ def update_dataset(stages: list, hf_org: str, hf_prefix: str, oauth_token: gr.OA
     if "ingestion" in stages:
         # TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
-        ingestion_ds = load_dataset(dataset_name, name="ingested", split="train", token=oauth_token.token).select_columns("document_text")
-        ingestion_df = pd.DataFrame([ingestion_ds[0]]) # only one row
     if "summarization" in stages:
         summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True, token=oauth_token.token).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
         summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])

     if "ingestion" in stages:
         # TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
+        ingestion_ds = load_dataset(dataset_name, name="ingested", split="train", streaming=True, token=oauth_token.token).select_columns("document_text")
+        ingestion_df = pd.DataFrame([next(iter(ingestion_ds)) for _ in range(1)]) # only one row
     if "summarization" in stages:
         summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True, token=oauth_token.token).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
         summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])