Spaces:
Runtime error
Runtime error
File size: 1,572 Bytes
f44b91f 1f2b32e 38481ea 1f2b32e c4cc6f7 5473cc8 fed2521 38481ea 0d45303 6419e69 cfb4b98 05df1af 348fb48 d811903 348fb48 d811903 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from datasets import load_dataset
raw_datasets = load_dataset("allocine")
#raw_datasets.save_to_disk("awacke1/my-arrow-datasets")
raw_datasets.save_to_disk("my-arrow-datasets")
#raw_datasets = load_dataset("awacke1/my-arrow-datasets")
#raw_datasets = load_dataset("my-arrow-datasets")
#raw_datasets.cache_files
#from datasets import load_dataset
#dataset = load_dataset("awacke1/my-arrow-datasets")
from datasets import load_from_disk
#arrow_datasets_reloaded = load_from_disk("awacke1/my-arrow-datasets")
arrow_datasets_reloaded = load_from_disk("my-arrow-datasets")
arrow_datasets_reloaded
for split, dataset in raw_datasets.items():
dataset.to_csv(f"my-dataset-{split}.csv", index=None)
data_files = {
"train": "my-dataset-train.csv",
"validation": "my-dataset-validation.csv",
"test": "my-dataset-test.csv",
}
csv_datasets_reloaded = load_dataset("csv", data_files=data_files)
csv_datasets_reloaded
for split, dataset in raw_datasets.items():
dataset.to_json(f"my-dataset-{split}.jsonl")
for split, dataset in raw_datasets.items():
dataset.to_parquet(f"my-dataset-{split}.parquet")
json_data_files = {
"train": "my-dataset-train.jsonl",
"validation": "my-dataset-validation.jsonl",
"test": "my-dataset-test.jsonl",
}
parquet_data_files = {
"train": "my-dataset-train.parquet",
"validation": "my-dataset-validation.parquet",
"test": "my-dataset-test.parquet",
}
json_datasets_reloaded = load_dataset("json", data_files=json_data_files)
parquet_datasets_reloaded = load_dataset("parquet", data_files=parquet_data_files)
|