Spaces:
Sleeping
Sleeping
Update api.py
Browse files
api.py
CHANGED
@@ -58,36 +58,40 @@ async def summ_inference(txts: str):
|
|
58 |
result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
|
59 |
return result
|
60 |
|
61 |
-
# def scrape_multi_process(urls):
|
62 |
-
# logging.warning('Entering get_news_multi_process() to extract new news articles')
|
63 |
-
# '''
|
64 |
-
# Get the data shape by parallely calculating lenght of each chunk and
|
65 |
-
# aggregating them to get lenght of complete training dataset
|
66 |
-
# '''
|
67 |
-
# pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
|
68 |
-
|
69 |
-
# results = []
|
70 |
-
# for url in urls:
|
71 |
-
# f = pool.apply_async(scrape_text, [url]) # asynchronously applying function to chunk. Each worker parallely begins to work on the job
|
72 |
-
# results.append(f) # appending result to results
|
73 |
-
|
74 |
-
# scraped_texts = []
|
75 |
-
# for f in results:
|
76 |
-
# scraped_texts.append(f.get(timeout=120))
|
77 |
-
# pool.close()
|
78 |
-
# pool.join()
|
79 |
-
# logging.warning('Exiting scrape_multi_process()')
|
80 |
-
# return scraped_texts
|
81 |
|
82 |
async def scrape_urls(urls):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
scraped_texts = []
|
84 |
scrape_errors = []
|
85 |
-
for
|
86 |
-
|
87 |
-
scraped_texts.append(
|
88 |
-
scrape_errors.append(
|
|
|
|
|
|
|
89 |
return scraped_texts, scrape_errors
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
##### API #####
|
92 |
app = FastAPI()
|
93 |
summ_tokenizer, summ_model = load_summarizer_models()
|
|
|
58 |
result = ["" if t=="" else inference_tokenizer.decode(p, skip_special_tokens=True).strip() for t, p in zip(txts, pred)]
|
59 |
return result
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
async def scrape_urls(urls):
|
63 |
+
logging.warning('Entering scrape_urls()')
|
64 |
+
'''
|
65 |
+
Get the data shape by parallely calculating lenght of each chunk and
|
66 |
+
aggregating them to get lenght of complete training dataset
|
67 |
+
'''
|
68 |
+
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
|
69 |
+
|
70 |
+
results = []
|
71 |
+
for url in urls:
|
72 |
+
f = pool.apply_async(await scrape_text, [url]) # asynchronously applying function to chunk. Each worker parallely begins to work on the job
|
73 |
+
results.append(f) # appending result to results
|
74 |
+
|
75 |
scraped_texts = []
|
76 |
scrape_errors = []
|
77 |
+
for f in results:
|
78 |
+
t, e = f.get(timeout=120)
|
79 |
+
scraped_texts.append(t)
|
80 |
+
scrape_errors.append(e)
|
81 |
+
pool.close()
|
82 |
+
pool.join()
|
83 |
+
logging.warning('Exiting scrape_urls()')
|
84 |
return scraped_texts, scrape_errors
|
85 |
|
86 |
+
# async def scrape_urls(urls):
|
87 |
+
# scraped_texts = []
|
88 |
+
# scrape_errors = []
|
89 |
+
# for url in urls:
|
90 |
+
# text, err = await scrape_text(url)
|
91 |
+
# scraped_texts.append(text)
|
92 |
+
# scrape_errors.append(err)
|
93 |
+
# return scraped_texts, scrape_errors
|
94 |
+
|
95 |
##### API #####
|
96 |
app = FastAPI()
|
97 |
summ_tokenizer, summ_model = load_summarizer_models()
|