adithya747 commited on
Commit
90a9f3d
Β·
verified Β·
1 Parent(s): 4a1e457

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -43
app.py CHANGED
@@ -3,23 +3,20 @@ import requests
3
  from bs4 import BeautifulSoup
4
  from transformers import pipeline
5
 
6
- # Use a more lightweight model for Hugging Face Spaces
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
 
9
  def scrape_website(url):
10
  """Extracts text from a website with error handling"""
11
  try:
12
- headers = {'User-Agent': 'Mozilla/5.0'} # Add headers to prevent 403 errors
13
  response = requests.get(url, headers=headers, timeout=10)
14
- response.raise_for_status() # Raise HTTP errors
15
 
16
  soup = BeautifulSoup(response.text, "html.parser")
17
-
18
- # Extract text from common content-containing tags
19
  text_elements = soup.find_all(['p', 'article', 'main', 'section'])
20
  text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
21
-
22
- return text if text.strip() else "No content found"
23
 
24
  except Exception as e:
25
  return f"Scraping Error: {str(e)}"
@@ -27,44 +24,97 @@ def scrape_website(url):
27
  def summarize_website(url):
28
  """Handles the full summarization pipeline"""
29
  try:
30
- extracted_text = scrape_website(url)
31
-
32
- if "Error" in extracted_text:
33
- return extracted_text
34
 
35
- # Check minimum text length
36
- if len(extracted_text.split()) < 50:
37
- return "Error: Insufficient content for summarization (minimum 50 words required)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # Truncate text to model's max input length (1024 tokens for DistilBART)
40
- max_input_length = 1000 # Conservative estimate for token count
41
- truncated_text = extracted_text[:max_input_length]
42
-
43
- # Generate summary
44
- summary = summarizer(
45
- truncated_text,
46
- max_length=200,
47
- min_length=50,
48
- do_sample=False,
49
- truncation=True # Ensure truncation is enabled
50
- )
51
-
52
- return f"**Summary:**\n\n{summary[0]['summary_text']}"
53
-
54
  except Exception as e:
55
- return f"Summarization Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # Gradio interface with improved configuration
58
- iface = gr.Interface(
59
- fn=summarize_website,
60
- inputs=gr.Textbox(label="Website URL", placeholder="Enter full URL (including https://)..."),
61
- outputs=gr.Markdown(),
62
- title="AI-Powered Website Summarizer",
63
- description="Enter a website URL to get an AI-generated summary of its content",
64
- examples=[
65
- ["https://en.wikipedia.org/wiki/Large_language_model"],
66
- ["https://www.bbc.com/news/technology-66510295"]
67
- ]
68
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- iface.launch()
 
 
 
 
 
 
3
  from bs4 import BeautifulSoup
4
  from transformers import pipeline
5
 
6
+ # Load summarization pipeline
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
 
9
  def scrape_website(url):
10
  """Extracts text from a website with error handling"""
11
  try:
12
+ headers = {'User-Agent': 'Mozilla/5.0'}
13
  response = requests.get(url, headers=headers, timeout=10)
14
+ response.raise_for_status()
15
 
16
  soup = BeautifulSoup(response.text, "html.parser")
 
 
17
  text_elements = soup.find_all(['p', 'article', 'main', 'section'])
18
  text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
19
+ return text.strip() if text.strip() else "No content found"
 
20
 
21
  except Exception as e:
22
  return f"Scraping Error: {str(e)}"
 
24
  def summarize_website(url):
25
  """Handles the full summarization pipeline"""
26
  try:
27
+ with gr.Column(variant="panel"):
28
+ gr.Markdown("## ⚑ Processing...")
 
 
29
 
30
+ extracted_text = scrape_website(url)
31
+
32
+ if "Error" in extracted_text:
33
+ return f"❌ {extracted_text}"
34
+
35
+ if len(extracted_text.split()) < 50:
36
+ return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)"
37
+
38
+ max_input_length = 1000
39
+ truncated_text = extracted_text[:max_input_length]
40
+
41
+ summary = summarizer(
42
+ truncated_text,
43
+ max_length=200,
44
+ min_length=50,
45
+ do_sample=False,
46
+ truncation=True
47
+ )
48
+
49
+ return f"## πŸ“ Summary\n\n{summary[0]['summary_text']}"
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
+ return f"β›” Summarization Error: {str(e)}"
53
+
54
+ # Custom CSS for mobile optimization
55
+ css = """
56
+ @media screen and (max-width: 600px) {
57
+ .container {
58
+ padding: 10px !important;
59
+ }
60
+ .input-box textarea {
61
+ font-size: 16px !important;
62
+ }
63
+ }
64
+ """
65
 
66
+ # Mobile-optimized interface with Blocks API
67
+ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
68
+ gr.Markdown("# 🌐 AI Website Summarizer")
69
+ gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
70
+
71
+ with gr.Row():
72
+ url_input = gr.Textbox(
73
+ label="Website URL",
74
+ placeholder="Enter full URL (https://...)",
75
+ lines=1,
76
+ max_lines=1,
77
+ elem_id="input-box"
78
+ )
79
+
80
+ with gr.Row():
81
+ submit_btn = gr.Button("Generate Summary πŸš€", variant="primary")
82
+ clear_btn = gr.Button("Clear πŸ”„")
83
+
84
+ output = gr.Markdown()
85
+
86
+ # Example section
87
+ gr.Examples(
88
+ examples=[
89
+ ["https://en.wikipedia.org/wiki/Large_language_model"],
90
+ ["https://www.bbc.com/news/technology-66510295"]
91
+ ],
92
+ inputs=url_input,
93
+ label="Try these examples:",
94
+ examples_per_page=2
95
+ )
96
+
97
+ # Progress indicator
98
+ progress = gr.Textbox(visible=False)
99
+
100
+ # Event handlers
101
+ submit_btn.click(
102
+ fn=summarize_website,
103
+ inputs=url_input,
104
+ outputs=output,
105
+ api_name="summarize"
106
+ )
107
+
108
+ clear_btn.click(
109
+ fn=lambda: ("", ""),
110
+ inputs=None,
111
+ outputs=[url_input, output],
112
+ queue=False
113
+ )
114
 
115
+ # Mobile-friendly configuration
116
+ app.launch(
117
+ server_name="0.0.0.0",
118
+ server_port=7860,
119
+ favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg"
120
+ )