Reality123b commited on
Commit
e3fb49b
·
verified ·
1 Parent(s): 9a5e74c

Update application/utils/web_search.py

Browse files
Files changed (1) hide show
  1. application/utils/web_search.py +5 -5
application/utils/web_search.py CHANGED
@@ -1,7 +1,8 @@
1
- import requests
 
2
  from bs4 import BeautifulSoup
 
3
  import re
4
- from duckduckgo_search import DDGS
5
 
6
  class WebScarper:
7
  def __init__(self):
@@ -19,16 +20,15 @@ class WebScarper:
19
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
20
  }
21
  response = requests.get(url, headers=headers, timeout=10)
22
- response.raise_for_status()
23
  return response.text
24
  except requests.exceptions.RequestException as e:
25
  print(f"Error fetching URL {url}: {e}")
26
  return None
27
-
28
  def get_text(self, data):
29
  soup = BeautifulSoup(data, 'html.parser')
30
  text = soup.get_text()
31
- cleaned_text = re.sub(r'\s+', ' ', text).strip()
32
  return cleaned_text[:4000] if len(cleaned_text) > 4000 else cleaned_text
33
 
34
  def scarpe(self, query):
 
1
+ # application/utils/web_search.py
2
+ from duckduckgo_search import DDGS # Simpler import
3
  from bs4 import BeautifulSoup
4
+ import requests
5
  import re
 
6
 
7
  class WebScarper:
8
  def __init__(self):
 
20
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
21
  }
22
  response = requests.get(url, headers=headers, timeout=10)
23
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
24
  return response.text
25
  except requests.exceptions.RequestException as e:
26
  print(f"Error fetching URL {url}: {e}")
27
  return None
 
28
  def get_text(self, data):
29
  soup = BeautifulSoup(data, 'html.parser')
30
  text = soup.get_text()
31
+ cleaned_text = re.sub(r'\s+', ' ', text).strip() # Remove extra whitespace
32
  return cleaned_text[:4000] if len(cleaned_text) > 4000 else cleaned_text
33
 
34
  def scarpe(self, query):