Spaces:
Sleeping
Sleeping
Update api.py
Browse files
api.py
CHANGED
@@ -28,7 +28,7 @@ def load_summarizer_models():
|
|
28 |
return summ_tokenizer, summ_model
|
29 |
|
30 |
|
31 |
-
|
32 |
txt = re.sub(r'^By \. [\w\s]+ \. ', ' ', txt) # By . Ellie Zolfagharifard .
|
33 |
txt = re.sub(r'\d{1,2}\:\d\d [a-zA-Z]{3}', ' ', txt) # 10:30 EST
|
34 |
txt = re.sub(r'\d{1,2} [a-zA-Z]+ \d{4}', ' ', txt) # 10 November 1990
|
@@ -51,7 +51,7 @@ async def summ_inference_tokenize(input_: list, n_tokens: int):
|
|
51 |
|
52 |
|
53 |
async def summ_inference(txts: str):
|
54 |
-
txts = [*map(
|
55 |
inference_tokenizer, tokenized_data = await summ_inference_tokenize(input_=txts, n_tokens=SUMM_INPUT_N_TOKENS)
|
56 |
pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
|
57 |
result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
|
|
|
28 |
return summ_tokenizer, summ_model
|
29 |
|
30 |
|
31 |
+
def summ_preprocess(txt):
|
32 |
txt = re.sub(r'^By \. [\w\s]+ \. ', ' ', txt) # By . Ellie Zolfagharifard .
|
33 |
txt = re.sub(r'\d{1,2}\:\d\d [a-zA-Z]{3}', ' ', txt) # 10:30 EST
|
34 |
txt = re.sub(r'\d{1,2} [a-zA-Z]+ \d{4}', ' ', txt) # 10 November 1990
|
|
|
51 |
|
52 |
|
53 |
async def summ_inference(txts: str):
|
54 |
+
txts = [*map(summ_preprocess, txts)]
|
55 |
inference_tokenizer, tokenized_data = await summ_inference_tokenize(input_=txts, n_tokens=SUMM_INPUT_N_TOKENS)
|
56 |
pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
|
57 |
result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
|