Update processing_qwen2_ts.py to allow text-only processing (#6)

- Update processing_qwen2_ts.py to allow text-only processing (4f719ca3c9dac4de03a9f244602ca966f94e1926)

Co-authored-by: Alexander Chemeris <[email protected]>

Files changed (1) hide show

processing_qwen2_ts.py CHANGED Viewed

@@ -19,11 +19,7 @@ import torch
 from transformers.feature_extraction_utils import BatchFeature
 from transformers.processing_utils import ProcessorMixin
-from transformers.tokenization_utils_base import (
-    PreTokenizedInput,
-    TextInput,
-    PaddingStrategy,
-)
 def sp_encoding(timeseries: np.ndarray, eots_token: bool = True) -> Tuple[np.ndarray, str, dict]:
     """
@@ -70,8 +66,8 @@ class Qwen2TSProcessor(ProcessorMixin):
     def __call__(
         self,
-        text: List[str],
-        timeseries: List[List[np.ndarray]],
         padding: Union[bool, str, PaddingStrategy] = False,
         padding_side: str = 'left',
         vllm_flag: bool = False,
@@ -92,6 +88,8 @@ class Qwen2TSProcessor(ProcessorMixin):
         """
         if type(text) == str:
             text = [text]
         encoded_ts_arrays = []
         reconstructed_prompts = []
@@ -139,10 +137,9 @@ class Qwen2TSProcessor(ProcessorMixin):
             tokenizer_outputs = self.tokenizer(reconstructed_prompts, padding=padding, padding_side=padding_side, **kwargs)
         # Create the final output
-        outputs = {
-            "timeseries": concatenated_ts
-        }
-        outputs.update(tokenizer_outputs)
         return BatchFeature(data=outputs)

 from transformers.feature_extraction_utils import BatchFeature
 from transformers.processing_utils import ProcessorMixin
+from transformers.tokenization_utils_base import PaddingStrategy
 def sp_encoding(timeseries: np.ndarray, eots_token: bool = True) -> Tuple[np.ndarray, str, dict]:
     """
     def __call__(
         self,
+        text: Union[str, List[str]],
+        timeseries: Optional[List[List[np.ndarray]]] = None,
         padding: Union[bool, str, PaddingStrategy] = False,
         padding_side: str = 'left',
         vllm_flag: bool = False,
         """
         if type(text) == str:
             text = [text]
+        if timeseries is None:
+            timeseries = []
         encoded_ts_arrays = []
         reconstructed_prompts = []
             tokenizer_outputs = self.tokenizer(reconstructed_prompts, padding=padding, padding_side=padding_side, **kwargs)
         # Create the final output
+        outputs = tokenizer_outputs
+        if concatenated_ts is not None:
+            outputs["timeseries"] = concatenated_ts
         return BatchFeature(data=outputs)