Spaces:
Running
Running
Update app.py
Browse filesUpdating docstrings for better deployment by the agent, and adding a function to scan through paragraphs from websites.
app.py
CHANGED
@@ -25,7 +25,8 @@ def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return
|
|
25 |
@tool
|
26 |
def webpage_contents_get(url:str, headers_in: dict = None)-> str: #it's import to specify the return type
|
27 |
#Keep this format for the description / args / args description but feel free to modify the tool
|
28 |
-
"""A simple function to grab contents of a webpage
|
|
|
29 |
Args:
|
30 |
url: The URL the contents of which the tool will get
|
31 |
headers_in: A dictionary which defines the headers for the request. Defaults to None
|
@@ -44,6 +45,21 @@ def webpage_header_get(url:str)-> str: #it's import to specify the return type
|
|
44 |
response = requests.get(url)
|
45 |
return response.headers
|
46 |
@tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def webpage_contents_soup_links(response_content:bytes)->list:
|
48 |
'''
|
49 |
This function will find all links in the response.content from the webpage_contents_get tool
|
@@ -54,8 +70,7 @@ def webpage_contents_soup_links(response_content:bytes)->list:
|
|
54 |
list_ret
|
55 |
for link in soup.find_all("a"):
|
56 |
ret_t = link.get("href")
|
57 |
-
|
58 |
-
list_ret.append(link.get("href"))
|
59 |
return list_ret
|
60 |
|
61 |
|
|
|
25 |
@tool
|
26 |
def webpage_contents_get(url:str, headers_in: dict = None)-> str: #it's import to specify the return type
|
27 |
#Keep this format for the description / args / args description but feel free to modify the tool
|
28 |
+
"""A simple function to grab contents of a webpage. As this is very long and not easily interpretable,
|
29 |
+
summaries based on other tools applied to this content should be returned to users with questions.
|
30 |
Args:
|
31 |
url: The URL the contents of which the tool will get
|
32 |
headers_in: A dictionary which defines the headers for the request. Defaults to None
|
|
|
45 |
response = requests.get(url)
|
46 |
return response.headers
|
47 |
@tool
|
48 |
+
def webpage_contents_soup_paragraphs(response_content:bytes)->list:
|
49 |
+
'''
|
50 |
+
This function will find all paragraphs in the response.content from the webpage_contents_get tool
|
51 |
+
Since this can be long, it is best to search this text for more concise summaries to return.
|
52 |
+
Args:
|
53 |
+
response_content: response.content value returned by webpage_contents_get tool.
|
54 |
+
'''
|
55 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
56 |
+
list_ret
|
57 |
+
for paragraph in soup.find_all("p"):
|
58 |
+
ret_t = paragraph.text
|
59 |
+
#print(link.get("href"))
|
60 |
+
list_ret.append(ret_t)
|
61 |
+
return list_ret
|
62 |
+
@tool
|
63 |
def webpage_contents_soup_links(response_content:bytes)->list:
|
64 |
'''
|
65 |
This function will find all links in the response.content from the webpage_contents_get tool
|
|
|
70 |
list_ret
|
71 |
for link in soup.find_all("a"):
|
72 |
ret_t = link.get("href")
|
73 |
+
list_ret.append(ret_t)
|
|
|
74 |
return list_ret
|
75 |
|
76 |
|