Text Generation
Transformers
PyTorch
chatts
feature-extraction
conversational
custom_code
xiezhe24 alexanderchemeris commited on
Commit
db0db5e
·
verified ·
1 Parent(s): b54f0d8

Update processing_qwen2_ts.py to allow text-only processing (#6)

Browse files

- Update processing_qwen2_ts.py to allow text-only processing (4f719ca3c9dac4de03a9f244602ca966f94e1926)


Co-authored-by: Alexander Chemeris <[email protected]>

Files changed (1) hide show
  1. processing_qwen2_ts.py +8 -11
processing_qwen2_ts.py CHANGED
@@ -19,11 +19,7 @@ import torch
19
 
20
  from transformers.feature_extraction_utils import BatchFeature
21
  from transformers.processing_utils import ProcessorMixin
22
- from transformers.tokenization_utils_base import (
23
- PreTokenizedInput,
24
- TextInput,
25
- PaddingStrategy,
26
- )
27
 
28
  def sp_encoding(timeseries: np.ndarray, eots_token: bool = True) -> Tuple[np.ndarray, str, dict]:
29
  """
@@ -70,8 +66,8 @@ class Qwen2TSProcessor(ProcessorMixin):
70
 
71
  def __call__(
72
  self,
73
- text: List[str],
74
- timeseries: List[List[np.ndarray]],
75
  padding: Union[bool, str, PaddingStrategy] = False,
76
  padding_side: str = 'left',
77
  vllm_flag: bool = False,
@@ -92,6 +88,8 @@ class Qwen2TSProcessor(ProcessorMixin):
92
  """
93
  if type(text) == str:
94
  text = [text]
 
 
95
 
96
  encoded_ts_arrays = []
97
  reconstructed_prompts = []
@@ -139,10 +137,9 @@ class Qwen2TSProcessor(ProcessorMixin):
139
  tokenizer_outputs = self.tokenizer(reconstructed_prompts, padding=padding, padding_side=padding_side, **kwargs)
140
 
141
  # Create the final output
142
- outputs = {
143
- "timeseries": concatenated_ts
144
- }
145
- outputs.update(tokenizer_outputs)
146
 
147
  return BatchFeature(data=outputs)
148
 
 
19
 
20
  from transformers.feature_extraction_utils import BatchFeature
21
  from transformers.processing_utils import ProcessorMixin
22
+ from transformers.tokenization_utils_base import PaddingStrategy
 
 
 
 
23
 
24
  def sp_encoding(timeseries: np.ndarray, eots_token: bool = True) -> Tuple[np.ndarray, str, dict]:
25
  """
 
66
 
67
  def __call__(
68
  self,
69
+ text: Union[str, List[str]],
70
+ timeseries: Optional[List[List[np.ndarray]]] = None,
71
  padding: Union[bool, str, PaddingStrategy] = False,
72
  padding_side: str = 'left',
73
  vllm_flag: bool = False,
 
88
  """
89
  if type(text) == str:
90
  text = [text]
91
+ if timeseries is None:
92
+ timeseries = []
93
 
94
  encoded_ts_arrays = []
95
  reconstructed_prompts = []
 
137
  tokenizer_outputs = self.tokenizer(reconstructed_prompts, padding=padding, padding_side=padding_side, **kwargs)
138
 
139
  # Create the final output
140
+ outputs = tokenizer_outputs
141
+ if concatenated_ts is not None:
142
+ outputs["timeseries"] = concatenated_ts
 
143
 
144
  return BatchFeature(data=outputs)
145