krishna-k's picture
Upload folder using huggingface_hub
06555b5 verified
import errno
import logging
import os
import sys
from typing import Iterator
from urllib.request import urlopen
log = logging.getLogger(__name__)
def iter_data_dirs(check_writable: bool = False) -> Iterator[str]:
try:
yield os.environ["PYAV_TESTDATA_DIR"]
except KeyError:
pass
if os.name == "nt":
yield os.path.join(sys.prefix, "pyav", "datasets")
return
bases = [
"/usr/local/share",
"/usr/local/lib",
"/usr/share",
"/usr/lib",
]
# Prefer the local virtualenv.
if hasattr(sys, "real_prefix"):
bases.insert(0, sys.prefix)
for base in bases:
dir_ = os.path.join(base, "pyav", "datasets")
if check_writable:
if os.path.exists(dir_):
if not os.access(dir_, os.W_OK):
continue
else:
if not os.access(base, os.W_OK):
continue
yield dir_
yield os.path.join(os.path.expanduser("~"), ".pyav", "datasets")
def cached_download(url: str, name: str) -> str:
"""Download the data at a URL, and cache it under the given name.
The file is stored under `pyav/test` with the given name in the directory
:envvar:`PYAV_TESTDATA_DIR`, or the first that is writeable of:
- the current virtualenv
- ``/usr/local/share``
- ``/usr/local/lib``
- ``/usr/share``
- ``/usr/lib``
- the user's home
"""
clean_name = os.path.normpath(name)
if clean_name != name:
raise ValueError(f"{name} is not normalized.")
for dir_ in iter_data_dirs():
path = os.path.join(dir_, name)
if os.path.exists(path):
return path
dir_ = next(iter_data_dirs(True))
path = os.path.join(dir_, name)
log.info(f"Downloading {url} to {path}")
response = urlopen(url)
if response.getcode() != 200:
raise ValueError(f"HTTP {response.getcode()}")
dir_ = os.path.dirname(path)
try:
os.makedirs(dir_)
except OSError as e:
if e.errno != errno.EEXIST:
raise
tmp_path = path + ".tmp"
with open(tmp_path, "wb") as fh:
while True:
chunk = response.read(8196)
if chunk:
fh.write(chunk)
else:
break
os.rename(tmp_path, path)
return path
def fate(name: str) -> str:
"""Download and return a path to a sample from the FFmpeg test suite.
Data is handled by :func:`cached_download`.
See the `FFmpeg Automated Test Environment <https://www.ffmpeg.org/fate.html>`_
"""
return cached_download(
"http://fate.ffmpeg.org/fate-suite/" + name,
os.path.join("fate-suite", name.replace("/", os.path.sep)),
)
def curated(name: str) -> str:
"""Download and return a path to a sample that is curated by the PyAV developers.
Data is handled by :func:`cached_download`.
"""
return cached_download(
"https://pyav.org/datasets/" + name,
os.path.join("pyav-curated", name.replace("/", os.path.sep)),
)