ksvmuralidhar commited on
Commit
e5e59f1
·
verified ·
1 Parent(s): 1fcb4cc

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +28 -24
api.py CHANGED
@@ -58,36 +58,40 @@ async def summ_inference(txts: str):
58
  result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
59
  return result
60
 
61
- # def scrape_multi_process(urls):
62
- # logging.warning('Entering get_news_multi_process() to extract new news articles')
63
- # '''
64
- # Get the data shape by parallely calculating lenght of each chunk and
65
- # aggregating them to get lenght of complete training dataset
66
- # '''
67
- # pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
68
-
69
- # results = []
70
- # for url in urls:
71
- # f = pool.apply_async(scrape_text, [url]) # asynchronously applying function to chunk. Each worker parallely begins to work on the job
72
- # results.append(f) # appending result to results
73
-
74
- # scraped_texts = []
75
- # for f in results:
76
- # scraped_texts.append(f.get(timeout=120))
77
- # pool.close()
78
- # pool.join()
79
- # logging.warning('Exiting scrape_multi_process()')
80
- # return scraped_texts
81
 
82
  async def scrape_urls(urls):
 
 
 
 
 
 
 
 
 
 
 
 
83
  scraped_texts = []
84
  scrape_errors = []
85
- for url in urls:
86
- text, err = await scrape_text(url)
87
- scraped_texts.append(text)
88
- scrape_errors.append(err)
 
 
 
89
  return scraped_texts, scrape_errors
90
 
 
 
 
 
 
 
 
 
 
91
  ##### API #####
92
  app = FastAPI()
93
  summ_tokenizer, summ_model = load_summarizer_models()
 
58
  result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
59
  return result
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  async def scrape_urls(urls):
63
+ logging.warning('Entering scrape_urls()')
64
+ '''
65
+ Get the data shape by parallely calculating lenght of each chunk and
66
+ aggregating them to get lenght of complete training dataset
67
+ '''
68
+ pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
69
+
70
+ results = []
71
+ for url in urls:
72
+ f = pool.apply_async(await scrape_text, [url]) # asynchronously applying function to chunk. Each worker parallely begins to work on the job
73
+ results.append(f) # appending result to results
74
+
75
  scraped_texts = []
76
  scrape_errors = []
77
+ for f in results:
78
+ t, e = f.get(timeout=120)
79
+ scraped_texts.append(t)
80
+ scrape_errors.append(e)
81
+ pool.close()
82
+ pool.join()
83
+ logging.warning('Exiting scrape_urls()')
84
  return scraped_texts, scrape_errors
85
 
86
+ # async def scrape_urls(urls):
87
+ # scraped_texts = []
88
+ # scrape_errors = []
89
+ # for url in urls:
90
+ # text, err = await scrape_text(url)
91
+ # scraped_texts.append(text)
92
+ # scrape_errors.append(err)
93
+ # return scraped_texts, scrape_errors
94
+
95
  ##### API #####
96
  app = FastAPI()
97
  summ_tokenizer, summ_model = load_summarizer_models()