prithivMLmods commited on
Commit
4237af4
·
verified ·
1 Parent(s): 5517553

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -36
app.py CHANGED
@@ -4,6 +4,7 @@ import json
4
  from bs4 import BeautifulSoup
5
  import requests
6
 
 
7
  css = '''
8
  .gradio-container{max-width: 1000px !important}
9
  h1{text-align:center}
@@ -12,34 +13,35 @@ footer {
12
  }
13
  '''
14
 
15
- def extract_text_from_webpage(html_content):
 
16
  soup = BeautifulSoup(html_content, 'html.parser')
17
  for tag in soup(["script", "style", "header", "footer"]):
18
  tag.extract()
19
  return soup.get_text(strip=True)
20
 
21
- def search(query):
22
- term = query
 
23
  all_results = []
24
  max_chars_per_page = 8000
25
  with requests.Session() as session:
26
- resp = session.get(
27
  url="https://www.google.com/search",
28
- headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.0.0"},
29
- params={"q": term, "num": 3, "udm": 14},
30
  timeout=5,
31
- verify=None,
32
  )
33
- resp.raise_for_status()
34
- soup = BeautifulSoup(resp.text, "html.parser")
35
  result_block = soup.find_all("div", attrs={"class": "g"})
36
  for result in result_block:
37
- link = result.find("a", href=True)
38
- link = link["href"]
39
  try:
40
- webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.0.0"}, timeout=5, verify=False)
41
- webpage.raise_for_status()
42
- visible_text = extract_text_from_webpage(webpage.text)
43
  if len(visible_text) > max_chars_per_page:
44
  visible_text = visible_text[:max_chars_per_page]
45
  all_results.append({"link": link, "text": visible_text})
@@ -47,36 +49,33 @@ def search(query):
47
  all_results.append({"link": link, "text": None})
48
  return all_results
49
 
 
50
  client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
51
  client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
52
  client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
53
 
54
- func_caller = []
55
-
56
- def respond(message, history):
57
- func_caller = []
58
 
59
  user_prompt = message
60
  functions_metadata = [
61
- {"type": "function", "function": {"name": "web_search", "description": "Search query on google", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "web search query"}}, "required": ["query"]}}},
62
  ]
63
 
64
  for msg in history:
65
- func_caller.append({"role": "user", "content": f"{str(msg[0])}"})
66
- func_caller.append({"role": "assistant", "content": f"{str(msg[1])}"})
67
 
68
- func_caller.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }} </functioncall> [USER] {message}'})
69
 
70
- response = client_gemma.chat_completion(func_caller, max_tokens=200)
71
  response = str(response)
72
  try:
73
  response = response[int(response.find("{")):int(response.rindex("}"))+1]
74
  except:
75
  response = response[int(response.find("{")):(int(response.rfind("}"))+1)]
76
- response = response.replace("\\n", "")
77
- response = response.replace("\\'", "'")
78
- response = response.replace('\\"', '"')
79
- response = response.replace('\\', '')
80
  print(f"\n{response}")
81
 
82
  try:
@@ -84,14 +83,14 @@ def respond(message, history):
84
  if json_data["name"] == "web_search":
85
  query = json_data["arguments"]["query"]
86
  gr.Info("Searching Web")
87
- web_results = search(query)
88
  gr.Info("Extracting relevant Info")
89
- web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results if res['text']])
90
- messages = f"system\nWeb Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
91
  for msg in history:
92
  messages += f"\nuser\n{str(msg[0])}"
93
  messages += f"\nassistant\n{str(msg[1])}"
94
- messages+=f"\nuser\n{message}\nweb_result\n{web2}\nassistant\n"
95
  stream = client_mixtral.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
96
  output = ""
97
  for response in stream:
@@ -99,11 +98,11 @@ def respond(message, history):
99
  output += response.token.text
100
  yield output
101
  else:
102
- messages = f"system\nWeb Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
103
  for msg in history:
104
  messages += f"\nuser\n{str(msg[0])}"
105
  messages += f"\nassistant\n{str(msg[1])}"
106
- messages+=f"\nuser\n{message}\nassistant\n"
107
  stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
108
  output = ""
109
  for response in stream:
@@ -111,11 +110,11 @@ def respond(message, history):
111
  output += response.token.text
112
  yield output
113
  except:
114
- messages = f"system\nWeb Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
115
  for msg in history:
116
  messages += f"\nuser\n{str(msg[0])}"
117
  messages += f"\nassistant\n{str(msg[1])}"
118
- messages+=f"\nuser\n{message}\nassistant\n"
119
  stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
120
  output = ""
121
  for response in stream:
@@ -123,8 +122,9 @@ def respond(message, history):
123
  output += response.token.text
124
  yield output
125
 
 
126
  demo = gr.ChatInterface(
127
- fn=respond,
128
  chatbot=gr.Chatbot(),
129
  description=" ",
130
  textbox=gr.Textbox(),
@@ -133,4 +133,5 @@ demo = gr.ChatInterface(
133
  css=css,
134
  theme="allenai/gradio-theme",
135
  )
 
136
  demo.launch(share=True)
 
4
  from bs4 import BeautifulSoup
5
  import requests
6
 
7
+ # Custom CSS for Gradio app
8
  css = '''
9
  .gradio-container{max-width: 1000px !important}
10
  h1{text-align:center}
 
13
  }
14
  '''
15
 
16
+ # Function to extract text from a webpage
17
+ def get_text_from_html(html_content):
18
  soup = BeautifulSoup(html_content, 'html.parser')
19
  for tag in soup(["script", "style", "header", "footer"]):
20
  tag.extract()
21
  return soup.get_text(strip=True)
22
 
23
+ # Function to perform a web search
24
+ def perform_search(query):
25
+ search_term = query
26
  all_results = []
27
  max_chars_per_page = 8000
28
  with requests.Session() as session:
29
+ response = session.get(
30
  url="https://www.google.com/search",
31
+ headers={"User-Agent": "Mozilla/5.0"},
32
+ params={"q": search_term, "num": 3, "udm": 14},
33
  timeout=5,
34
+ verify=False,
35
  )
36
+ response.raise_for_status()
37
+ soup = BeautifulSoup(response.text, "html.parser")
38
  result_block = soup.find_all("div", attrs={"class": "g"})
39
  for result in result_block:
40
+ link = result.find("a", href=True)["href"]
 
41
  try:
42
+ webpage_response = session.get(link, headers={"User-Agent": "Mozilla/5.0"}, timeout=5, verify=False)
43
+ webpage_response.raise_for_status()
44
+ visible_text = get_text_from_html(webpage_response.text)
45
  if len(visible_text) > max_chars_per_page:
46
  visible_text = visible_text[:max_chars_per_page]
47
  all_results.append({"link": link, "text": visible_text})
 
49
  all_results.append({"link": link, "text": None})
50
  return all_results
51
 
52
+ # Initialize inference clients
53
  client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
54
  client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
55
  client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
56
 
57
+ # Function to handle responses
58
+ def chat_response(message, history):
59
+ func_calls = []
 
60
 
61
  user_prompt = message
62
  functions_metadata = [
63
+ {"type": "function", "function": {"name": "web_search", "description": "Search query on Google", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Web search query"}}, "required": ["query"]}}},
64
  ]
65
 
66
  for msg in history:
67
+ func_calls.append({"role": "user", "content": f"{str(msg[0])}"})
68
+ func_calls.append({"role": "assistant", "content": f"{str(msg[1])}"})
69
 
70
+ func_calls.append({"role": "user", "content": f'[SYSTEM] You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }} </functioncall> [USER] {message}'})
71
 
72
+ response = client_gemma.chat_completion(func_calls, max_tokens=200)
73
  response = str(response)
74
  try:
75
  response = response[int(response.find("{")):int(response.rindex("}"))+1]
76
  except:
77
  response = response[int(response.find("{")):(int(response.rfind("}"))+1)]
78
+ response = response.replace("\\n", "").replace("\\'", "'").replace('\\"', '"').replace('\\', '')
 
 
 
79
  print(f"\n{response}")
80
 
81
  try:
 
83
  if json_data["name"] == "web_search":
84
  query = json_data["arguments"]["query"]
85
  gr.Info("Searching Web")
86
+ web_results = perform_search(query)
87
  gr.Info("Extracting relevant Info")
88
+ web_summary = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results if res['text']])
89
+ messages = "system\nWeb Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
90
  for msg in history:
91
  messages += f"\nuser\n{str(msg[0])}"
92
  messages += f"\nassistant\n{str(msg[1])}"
93
+ messages += f"\nuser\n{message}\nweb_result\n{web_summary}\nassistant\n"
94
  stream = client_mixtral.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
95
  output = ""
96
  for response in stream:
 
98
  output += response.token.text
99
  yield output
100
  else:
101
+ messages = "system\nWeb Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
102
  for msg in history:
103
  messages += f"\nuser\n{str(msg[0])}"
104
  messages += f"\nassistant\n{str(msg[1])}"
105
+ messages += f"\nuser\n{message}\nassistant\n"
106
  stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
107
  output = ""
108
  for response in stream:
 
110
  output += response.token.text
111
  yield output
112
  except:
113
+ messages = "system\nWeb Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
114
  for msg in history:
115
  messages += f"\nuser\n{str(msg[0])}"
116
  messages += f"\nassistant\n{str(msg[1])}"
117
+ messages += f"\nuser\n{message}\nassistant\n"
118
  stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
119
  output = ""
120
  for response in stream:
 
122
  output += response.token.text
123
  yield output
124
 
125
+ # Create Gradio interface
126
  demo = gr.ChatInterface(
127
+ fn=chat_response,
128
  chatbot=gr.Chatbot(),
129
  description=" ",
130
  textbox=gr.Textbox(),
 
133
  css=css,
134
  theme="allenai/gradio-theme",
135
  )
136
+
137
  demo.launch(share=True)