Spaces:
Sleeping
Sleeping
Update api.py
Browse files
api.py
CHANGED
@@ -28,7 +28,7 @@ def load_summarizer_models():
|
|
28 |
return summ_tokenizer, summ_model
|
29 |
|
30 |
|
31 |
-
def summ_preprocess(txt):
|
32 |
txt = re.sub(r'^By \. [\w\s]+ \. ', ' ', txt) # By . Ellie Zolfagharifard .
|
33 |
txt = re.sub(r'\d{1,2}\:\d\d [a-zA-Z]{3}', ' ', txt) # 10:30 EST
|
34 |
txt = re.sub(r'\d{1,2} [a-zA-Z]+ \d{4}', ' ', txt) # 10 November 1990
|
@@ -45,14 +45,14 @@ def summ_preprocess(txt):
|
|
45 |
return txt
|
46 |
|
47 |
|
48 |
-
def summ_inference_tokenize(input_: list, n_tokens: int):
|
49 |
tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
|
50 |
return summ_tokenizer, tokenized_data
|
51 |
|
52 |
|
53 |
-
def summ_inference(txts: str):
|
54 |
-
txts = [*map(summ_preprocess, txts)]
|
55 |
-
inference_tokenizer, tokenized_data = summ_inference_tokenize(input_=txts, n_tokens=SUMM_INPUT_N_TOKENS)
|
56 |
pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
|
57 |
result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
|
58 |
return result
|
@@ -82,7 +82,7 @@ def scrape_urls(urls):
|
|
82 |
scraped_texts = []
|
83 |
scrape_errors = []
|
84 |
for url in urls:
|
85 |
-
text, err = scrape_text(url)
|
86 |
scraped_texts.append(text)
|
87 |
scrape_errors.append(err)
|
88 |
return scraped_texts, scrape_errors
|
@@ -117,7 +117,7 @@ async def read_items(q: URLList):
|
|
117 |
api_key = request_json['key']
|
118 |
_ = authenticate_key(api_key)
|
119 |
scraped_texts, scrape_errors = scrape_urls(urls)
|
120 |
-
summaries = summ_inference(scraped_texts)
|
121 |
status_code = 200
|
122 |
response_json = {'urls': urls, 'scraped_texts': scraped_texts, 'scrape_errors': scrape_errors, 'summaries': summaries, 'summarizer_error': ''}
|
123 |
except Exception as e:
|
|
|
28 |
return summ_tokenizer, summ_model
|
29 |
|
30 |
|
31 |
+
async def summ_preprocess(txt):
|
32 |
txt = re.sub(r'^By \. [\w\s]+ \. ', ' ', txt) # By . Ellie Zolfagharifard .
|
33 |
txt = re.sub(r'\d{1,2}\:\d\d [a-zA-Z]{3}', ' ', txt) # 10:30 EST
|
34 |
txt = re.sub(r'\d{1,2} [a-zA-Z]+ \d{4}', ' ', txt) # 10 November 1990
|
|
|
45 |
return txt
|
46 |
|
47 |
|
48 |
+
async def summ_inference_tokenize(input_: list, n_tokens: int):
|
49 |
tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
|
50 |
return summ_tokenizer, tokenized_data
|
51 |
|
52 |
|
53 |
+
async def summ_inference(txts: str):
|
54 |
+
txts = [*map(await summ_preprocess, txts)]
|
55 |
+
inference_tokenizer, tokenized_data = await summ_inference_tokenize(input_=txts, n_tokens=SUMM_INPUT_N_TOKENS)
|
56 |
pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
|
57 |
result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
|
58 |
return result
|
|
|
82 |
scraped_texts = []
|
83 |
scrape_errors = []
|
84 |
for url in urls:
|
85 |
+
text, err = await scrape_text(url)
|
86 |
scraped_texts.append(text)
|
87 |
scrape_errors.append(err)
|
88 |
return scraped_texts, scrape_errors
|
|
|
117 |
api_key = request_json['key']
|
118 |
_ = authenticate_key(api_key)
|
119 |
scraped_texts, scrape_errors = scrape_urls(urls)
|
120 |
+
summaries = await summ_inference(scraped_texts)
|
121 |
status_code = 200
|
122 |
response_json = {'urls': urls, 'scraped_texts': scraped_texts, 'scrape_errors': scrape_errors, 'summaries': summaries, 'summarizer_error': ''}
|
123 |
except Exception as e:
|