Spaces:

ksvmuralidhar
/

news_summarizer_api

Sleeping

ksvmuralidhar commited on Aug 18, 2024

Commit

e5e59f1

verified ·

1 Parent(s): 1fcb4cc

Update api.py

Files changed (1) hide show

api.py CHANGED Viewed

@@ -58,36 +58,40 @@ async def summ_inference(txts: str):
     result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
     return result
-# def scrape_multi_process(urls):
-#     logging.warning('Entering get_news_multi_process() to extract new news articles')
-#     '''
-#     Get the data shape by parallely calculating lenght of each chunk and
-#     aggregating them to get lenght of complete training dataset
-#     '''
-#     pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
-#     results = []
-#     for url in urls:
-#         f = pool.apply_async(scrape_text, [url]) # asynchronously applying function to chunk. Each worker parallely begins to work on the job
-#         results.append(f) # appending result to results
-#     scraped_texts = []
-#     for f in results:
-#         scraped_texts.append(f.get(timeout=120))
-#     pool.close()
-#     pool.join()
-#     logging.warning('Exiting scrape_multi_process()')
-#     return scraped_texts
 async def scrape_urls(urls):
     scraped_texts = []
     scrape_errors = []
-    for url in urls:
-        text, err = await scrape_text(url)
-        scraped_texts.append(text)
-        scrape_errors.append(err)
     return scraped_texts, scrape_errors
 ##### API #####
 app = FastAPI()
 summ_tokenizer, summ_model = load_summarizer_models()

     result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
     return result
 async def scrape_urls(urls):
+    logging.warning('Entering scrape_urls()')
+    '''
+    Get the data shape by parallely calculating lenght of each chunk and
+    aggregating them to get lenght of complete training dataset
+    '''
+    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
+    results = []
+    for url in urls:
+        f = pool.apply_async(await scrape_text, [url]) # asynchronously applying function to chunk. Each worker parallely begins to work on the job
+        results.append(f) # appending result to results
     scraped_texts = []
     scrape_errors = []
+    for f in results:
+        t, e = f.get(timeout=120)
+        scraped_texts.append(t)
+        scrape_errors.append(e)
+    pool.close()
+    pool.join()
+    logging.warning('Exiting scrape_urls()')
     return scraped_texts, scrape_errors
+# async def scrape_urls(urls):
+#     scraped_texts = []
+#     scrape_errors = []
+#     for url in urls:
+#         text, err = await scrape_text(url)
+#         scraped_texts.append(text)
+#         scrape_errors.append(err)
+#     return scraped_texts, scrape_errors
 ##### API #####
 app = FastAPI()
 summ_tokenizer, summ_model = load_summarizer_models()