Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,222 Bytes
1938217 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import transformers
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class ModelArguments:
model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
version: Optional[str] = field(default="v0")
freeze_backbone: bool = field(default=False)
tune_speech_projector: bool = field(default=False)
tune_speech_encoder: bool = field(default=False)
tune_speech_generator_only: bool = field(default=False)
speech_encoder_type: Optional[str] = field(default=None)
speech_encoder: Optional[str] = field(default=None)
pretrain_speech_projector: Optional[str] = field(default=None)
speech_projector_type: Optional[str] = field(default='linear')
speech_encoder_ds_rate: int = 5
speech_encoder_hidden_size: int = 1280
@dataclass
class DataArguments:
data_path: str = field(default=None,
metadata={"help": "Path to the training data."})
is_multimodal: bool = False
input_type: str = field(default="mel")
speech_normalize: bool = False
mel_size: int = 128
has_tgt_units: bool = False
@dataclass
class TrainingArguments(transformers.TrainingArguments):
cache_dir: Optional[str] = field(default=None)
optim: str = field(default="adamw_torch")
freeze_speech_projector: bool = field(default=False)
model_max_length: int = field(
default=512,
metadata={
"help":
"Maximum sequence length. Sequences will be right padded (and possibly truncated)."
},
)
double_quant: bool = field(
default=True,
metadata={"help": "Compress the quantization statistics through double quantization."}
)
quant_type: str = field(
default="nf4",
metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."}
)
bits: int = field(
default=16,
metadata={"help": "How many bits to use."}
)
lora_enable: bool = False
lora_r: int = 64
lora_alpha: int = 16
lora_dropout: float = 0.05
lora_weight_path: str = ""
lora_bias: str = "none"
speech_projector_lr: Optional[float] = None
group_by_modality_length: bool = field(default=False) |