File size: 376 Bytes
82bc972
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
# conda activate emilia
from datasets import load_dataset
import fire
def main(root: str="/data/scratch/pyp/datasets/emilia"):
    path = "EN/*.tar.gz*"
    dataset = load_dataset("amphion/Emilia-Dataset", data_files={"en": path}, split="en", streaming=False, revision="fc71e07e8572f5f3be1dbd02ed3172a4d298f152", cache_dir=root)

if __name__ == "__main__":
    fire.Fire(main)