File size: 2,604 Bytes
e390ccc
4bba8df
 
e390ccc
4bba8df
e390ccc
c554973
 
 
4bba8df
 
 
 
e390ccc
4bba8df
e390ccc
 
 
4bba8df
 
 
 
e390ccc
 
 
c554973
4bba8df
 
 
 
6d39e54
 
c554973
4bba8df
 
 
 
 
 
 
 
c554973
e390ccc
 
 
41bc8d2
4bba8df
e390ccc
41bc8d2
4bba8df
 
 
 
 
 
 
 
 
 
 
 
 
 
e390ccc
4bba8df
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os
import shutil
import subprocess

from transformers import AutoTokenizer, AutoModelForSequenceClassification

from interfaces.cap import languages as languages_cap
from interfaces.cap import domains as domains_cap

from interfaces.emotion9 import languages as languages_emotion9

from interfaces.illframes import domains as domains_illframes

from interfaces.cap import build_huggingface_path as hf_cap_path
from interfaces.cap_minor import build_huggingface_path as hf_cap_minor_path
from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
from interfaces.emotion import build_huggingface_path as hf_emotion_path
from interfaces.emotion9 import build_huggingface_path as hf_emotion9_path
from interfaces.ontolisst import build_huggingface_path as hf_ontlisst_path
from interfaces.illframes import build_huggingface_path as hf_illframes_path
from interfaces.ontolisst import build_huggingface_path as hf_ontolisst_path

HF_TOKEN = os.environ["hf_read"]

# should be a temporary solution
models = [hf_manifesto_path(""), hf_sentiment_path(""), hf_emotion_path(""), hf_cap_minor_path("", ""), hf_ontolisst_path("")]

# it gets more difficult with cap
domains_cap = list(domains_cap.values())
for language in languages_cap:
    for domain in domains_cap:
        models.append(hf_cap_path(language, domain))
        
# emotion9
for language in languages_emotion9:
    models.append(hf_emotion9_path(language))
    
# illframes (domains is a dict for some reason?)
for domain in domains_illframes.values():
    models.append(hf_illframes_path(domain))

tokenizers = ["xlm-roberta-large"]

def download_hf_models():
    for model_id in models:
        AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", 
                                                                   token=HF_TOKEN)
    for tokenizer_id in tokenizers:
        AutoTokenizer.from_pretrained(tokenizer_id)
        
        
def df_h():
    result = subprocess.run(["df", "-H"], capture_output=True, text=True)
    print(result.stdout)
    
    
def set_hf_cache_dir(path:str):
    os.environ['TRANSFORMERS_CACHE'] = path
    os.environ['HF_HOME'] = path
    os.environ['HF_DATASETS_CACHE'] = path
    os.environ['TORCH_HOME'] = path


def is_disk_full(min_free_space_in_GB=10):
    total, used, free = shutil.disk_usage("/")
    free_gb = free / (1024 ** 3)
    
    if free_gb >= min_free_space_in_GB:
        return False
    else:
        return True