ksvmuralidhar commited on
Commit
c3657ca
·
verified ·
1 Parent(s): 0191756

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +7 -7
api.py CHANGED
@@ -28,7 +28,7 @@ def load_summarizer_models():
28
  return summ_tokenizer, summ_model
29
 
30
 
31
- def summ_preprocess(txt):
32
  txt = re.sub(r'^By \. [\w\s]+ \. ', ' ', txt) # By . Ellie Zolfagharifard .
33
  txt = re.sub(r'\d{1,2}\:\d\d [a-zA-Z]{3}', ' ', txt) # 10:30 EST
34
  txt = re.sub(r'\d{1,2} [a-zA-Z]+ \d{4}', ' ', txt) # 10 November 1990
@@ -45,14 +45,14 @@ def summ_preprocess(txt):
45
  return txt
46
 
47
 
48
- def summ_inference_tokenize(input_: list, n_tokens: int):
49
  tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
50
  return summ_tokenizer, tokenized_data
51
 
52
 
53
- def summ_inference(txts: str):
54
- txts = [*map(summ_preprocess, txts)]
55
- inference_tokenizer, tokenized_data = summ_inference_tokenize(input_=txts, n_tokens=SUMM_INPUT_N_TOKENS)
56
  pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
57
  result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
58
  return result
@@ -82,7 +82,7 @@ def scrape_urls(urls):
82
  scraped_texts = []
83
  scrape_errors = []
84
  for url in urls:
85
- text, err = scrape_text(url)
86
  scraped_texts.append(text)
87
  scrape_errors.append(err)
88
  return scraped_texts, scrape_errors
@@ -117,7 +117,7 @@ async def read_items(q: URLList):
117
  api_key = request_json['key']
118
  _ = authenticate_key(api_key)
119
  scraped_texts, scrape_errors = scrape_urls(urls)
120
- summaries = summ_inference(scraped_texts)
121
  status_code = 200
122
  response_json = {'urls': urls, 'scraped_texts': scraped_texts, 'scrape_errors': scrape_errors, 'summaries': summaries, 'summarizer_error': ''}
123
  except Exception as e:
 
28
  return summ_tokenizer, summ_model
29
 
30
 
31
+ async def summ_preprocess(txt):
32
  txt = re.sub(r'^By \. [\w\s]+ \. ', ' ', txt) # By . Ellie Zolfagharifard .
33
  txt = re.sub(r'\d{1,2}\:\d\d [a-zA-Z]{3}', ' ', txt) # 10:30 EST
34
  txt = re.sub(r'\d{1,2} [a-zA-Z]+ \d{4}', ' ', txt) # 10 November 1990
 
45
  return txt
46
 
47
 
48
+ async def summ_inference_tokenize(input_: list, n_tokens: int):
49
  tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
50
  return summ_tokenizer, tokenized_data
51
 
52
 
53
+ async def summ_inference(txts: str):
54
+ txts = [*map(await summ_preprocess, txts)]
55
+ inference_tokenizer, tokenized_data = await summ_inference_tokenize(input_=txts, n_tokens=SUMM_INPUT_N_TOKENS)
56
  pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
57
  result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
58
  return result
 
82
  scraped_texts = []
83
  scrape_errors = []
84
  for url in urls:
85
+ text, err = await scrape_text(url)
86
  scraped_texts.append(text)
87
  scrape_errors.append(err)
88
  return scraped_texts, scrape_errors
 
117
  api_key = request_json['key']
118
  _ = authenticate_key(api_key)
119
  scraped_texts, scrape_errors = scrape_urls(urls)
120
+ summaries = await summ_inference(scraped_texts)
121
  status_code = 200
122
  response_json = {'urls': urls, 'scraped_texts': scraped_texts, 'scrape_errors': scrape_errors, 'summaries': summaries, 'summarizer_error': ''}
123
  except Exception as e: