fikird commited on
Commit
12d42ff
·
1 Parent(s): a23c67c

Add OSINT capabilities and advanced search features

Browse files
Files changed (4) hide show
  1. app.py +157 -37
  2. osint_engine.py +307 -0
  3. requirements.txt +18 -0
  4. search_engine.py +64 -0
app.py CHANGED
@@ -1,11 +1,15 @@
1
  import gradio as gr
2
- from search_engine import search
 
 
3
 
4
- def safe_search(query, max_results=5):
5
- try:
6
- results = search(query, max_results)
7
- formatted_results = []
8
 
 
 
 
9
  for result in results:
10
  formatted_result = f"""
11
  ### [{result['title']}]({result['url']})
@@ -16,42 +20,158 @@ def safe_search(query, max_results=5):
16
  **Published:** {result.get('published_date', 'N/A')}
17
  """
18
  formatted_results.append(formatted_result)
19
-
20
  return "\n---\n".join(formatted_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  except Exception as e:
22
  return f"Error: {str(e)}"
23
 
24
  # Create Gradio interface
25
- demo = gr.Interface(
26
- fn=safe_search,
27
- inputs=[
28
- gr.Textbox(
29
- label="Search Query",
30
- placeholder="Enter your search query...",
31
- lines=2
32
- ),
33
- gr.Slider(
34
- minimum=1,
35
- maximum=10,
36
- value=5,
37
- step=1,
38
- label="Number of Results"
39
- )
40
- ],
41
- outputs=gr.Markdown(label="Search Results"),
42
- title="🔍 Intelligent Search Engine",
43
- description="""
44
- An AI-powered search engine that provides intelligent summaries and insights from web content.
45
 
46
  Features:
47
- - Smart content summarization
48
- - Semantic search capabilities
49
- - Clean, readable results
50
- """,
51
- examples=[
52
- ["Latest developments in artificial intelligence", 3],
53
- ["Climate change solutions", 5],
54
- ["Space exploration news", 4]
55
- ],
56
- theme=gr.themes.Soft()
57
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import asyncio
3
+ from search_engine import search, advanced_search
4
+ from osint_engine import create_report
5
 
6
+ def format_results(results):
7
+ if not results:
8
+ return "No results found."
 
9
 
10
+ if isinstance(results, list):
11
+ # Format web search results
12
+ formatted_results = []
13
  for result in results:
14
  formatted_result = f"""
15
  ### [{result['title']}]({result['url']})
 
20
  **Published:** {result.get('published_date', 'N/A')}
21
  """
22
  formatted_results.append(formatted_result)
 
23
  return "\n---\n".join(formatted_results)
24
+ elif isinstance(results, dict):
25
+ # Format OSINT results
26
+ return create_report(results)
27
+ else:
28
+ return str(results)
29
+
30
+ def safe_search(query, search_type="web", max_results=5, platform=None,
31
+ image_url=None, phone=None, location=None, domain=None):
32
+ """Safe wrapper for search functions"""
33
+ try:
34
+ kwargs = {
35
+ "max_results": max_results,
36
+ "platform": platform,
37
+ "phone": phone,
38
+ "location": location,
39
+ "domain": domain
40
+ }
41
+
42
+ if search_type == "web":
43
+ results = search(query, max_results)
44
+ else:
45
+ # For async searches
46
+ if search_type == "image" and image_url:
47
+ query = image_url
48
+ loop = asyncio.new_event_loop()
49
+ asyncio.set_event_loop(loop)
50
+ results = loop.run_until_complete(advanced_search(query, search_type, **kwargs))
51
+ loop.close()
52
+
53
+ return format_results(results)
54
  except Exception as e:
55
  return f"Error: {str(e)}"
56
 
57
  # Create Gradio interface
58
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
59
+ gr.Markdown("# 🔍 Intelligent Search Engine")
60
+ gr.Markdown("""
61
+ An AI-powered search engine with advanced OSINT capabilities.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  Features:
64
+ - Web search with AI summaries
65
+ - Username search across platforms
66
+ - Image search and analysis
67
+ - Social media profile search
68
+ - Personal information gathering
69
+ - Historical data search
70
+ """)
71
+
72
+ with gr.Tab("Web Search"):
73
+ with gr.Row():
74
+ query_input = gr.Textbox(
75
+ label="Search Query",
76
+ placeholder="Enter your search query...",
77
+ lines=2
78
+ )
79
+ max_results = gr.Slider(
80
+ minimum=1,
81
+ maximum=10,
82
+ value=5,
83
+ step=1,
84
+ label="Number of Results"
85
+ )
86
+ search_button = gr.Button("Search")
87
+ results_output = gr.Markdown(label="Search Results")
88
+ search_button.click(
89
+ fn=lambda q, n: safe_search(q, "web", n),
90
+ inputs=[query_input, max_results],
91
+ outputs=results_output
92
+ )
93
+
94
+ with gr.Tab("Username Search"):
95
+ username_input = gr.Textbox(
96
+ label="Username",
97
+ placeholder="Enter username to search..."
98
+ )
99
+ username_button = gr.Button("Search Username")
100
+ username_output = gr.Markdown(label="Username Search Results")
101
+ username_button.click(
102
+ fn=lambda u: safe_search(u, "username"),
103
+ inputs=username_input,
104
+ outputs=username_output
105
+ )
106
+
107
+ with gr.Tab("Image Search"):
108
+ image_url = gr.Textbox(
109
+ label="Image URL",
110
+ placeholder="Enter image URL to search..."
111
+ )
112
+ image_button = gr.Button("Search Image")
113
+ image_output = gr.Markdown(label="Image Search Results")
114
+ image_button.click(
115
+ fn=lambda u: safe_search(u, "image", image_url=u),
116
+ inputs=image_url,
117
+ outputs=image_output
118
+ )
119
+
120
+ with gr.Tab("Social Media Search"):
121
+ with gr.Row():
122
+ social_username = gr.Textbox(
123
+ label="Username",
124
+ placeholder="Enter username..."
125
+ )
126
+ platform = gr.Dropdown(
127
+ choices=["all", "instagram", "twitter", "reddit"],
128
+ value="all",
129
+ label="Platform"
130
+ )
131
+ social_button = gr.Button("Search Social Media")
132
+ social_output = gr.Markdown(label="Social Media Results")
133
+ social_button.click(
134
+ fn=lambda u, p: safe_search(u, "social", platform=p),
135
+ inputs=[social_username, platform],
136
+ outputs=social_output
137
+ )
138
+
139
+ with gr.Tab("Personal Info"):
140
+ with gr.Row():
141
+ phone = gr.Textbox(label="Phone Number", placeholder="+1234567890")
142
+ location = gr.Textbox(label="Location", placeholder="City, Country")
143
+ domain = gr.Textbox(label="Domain", placeholder="example.com")
144
+ personal_button = gr.Button("Gather Information")
145
+ personal_output = gr.Markdown(label="Personal Information Results")
146
+ personal_button.click(
147
+ fn=lambda p, l, d: safe_search("", "personal", phone=p, location=l, domain=d),
148
+ inputs=[phone, location, domain],
149
+ outputs=personal_output
150
+ )
151
+
152
+ with gr.Tab("Historical Data"):
153
+ url_input = gr.Textbox(
154
+ label="URL",
155
+ placeholder="Enter URL to search historical data..."
156
+ )
157
+ historical_button = gr.Button("Search Historical Data")
158
+ historical_output = gr.Markdown(label="Historical Data Results")
159
+ historical_button.click(
160
+ fn=lambda u: safe_search(u, "historical"),
161
+ inputs=url_input,
162
+ outputs=historical_output
163
+ )
164
+
165
+ gr.Markdown("""
166
+ ### Examples
167
+ Try these example searches:
168
+ - Web Search: "Latest developments in artificial intelligence"
169
+ - Username: "johndoe"
170
+ - Image URL: "https://example.com/image.jpg"
171
+ - Social Media: "techuser" on Twitter
172
+ - Historical Data: "example.com"
173
+ """)
174
+
175
+ # Launch the app
176
+ if __name__ == "__main__":
177
+ demo.launch()
osint_engine.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import time
5
+ import asyncio
6
+ import aiohttp
7
+ import requests
8
+ import instaloader
9
+ import face_recognition
10
+ import numpy as np
11
+ from PIL import Image
12
+ from io import BytesIO
13
+ from typing import Dict, List, Any, Union
14
+ from selenium import webdriver
15
+ from selenium.webdriver.chrome.options import Options
16
+ from selenium.webdriver.chrome.service import Service
17
+ from webdriver_manager.chrome import ChromeDriverManager
18
+ from holehe.core import *
19
+ from sherlock import sherlock
20
+ from geopy.geocoders import Nominatim
21
+ from waybackpy import WaybackMachineCDXServerAPI
22
+ import phonenumbers
23
+ from phonenumbers import geocoder, carrier, timezone
24
+ import whois
25
+ from datetime import datetime
26
+
27
+ class OSINTEngine:
28
+ """OSINT capabilities for advanced information gathering"""
29
+
30
+ def __init__(self):
31
+ self.chrome_options = Options()
32
+ self.chrome_options.add_argument('--headless')
33
+ self.chrome_options.add_argument('--no-sandbox')
34
+ self.chrome_options.add_argument('--disable-dev-shm-usage')
35
+ self.setup_apis()
36
+
37
+ def setup_apis(self):
38
+ """Initialize API clients"""
39
+ self.instagram = instaloader.Instaloader()
40
+ self.geolocator = Nominatim(user_agent="intelligent_search")
41
+
42
+ async def search_username(self, username: str) -> Dict[str, Any]:
43
+ """Search for username across multiple platforms"""
44
+ results = {}
45
+
46
+ # Sherlock search
47
+ sherlock_results = await self.sherlock_search(username)
48
+ results['platforms'] = sherlock_results
49
+
50
+ # Email search
51
+ email_results = await self.search_email(f"{username}@gmail.com")
52
+ results['email'] = email_results
53
+
54
+ return results
55
+
56
+ async def sherlock_search(self, username: str) -> List[Dict[str, str]]:
57
+ """Search username using Sherlock"""
58
+ results = []
59
+ sites = sherlock.site_data()
60
+
61
+ async with aiohttp.ClientSession() as session:
62
+ tasks = []
63
+ for site_name, site_data in sites.items():
64
+ task = self.check_username(session, username, site_name, site_data)
65
+ tasks.append(task)
66
+
67
+ results = await asyncio.gather(*tasks)
68
+ return [r for r in results if r is not None]
69
+
70
+ async def check_username(self, session, username: str, site_name: str, site_data: Dict) -> Dict[str, str]:
71
+ """Check username on a specific platform"""
72
+ url = site_data.get('url', '').format(username=username)
73
+ if not url:
74
+ return None
75
+
76
+ try:
77
+ async with session.get(url) as response:
78
+ if response.status == 200:
79
+ return {
80
+ 'platform': site_name,
81
+ 'url': url,
82
+ 'found': True
83
+ }
84
+ except:
85
+ pass
86
+ return None
87
+
88
+ async def search_email(self, email: str) -> Dict[str, Any]:
89
+ """Search for email presence on various platforms"""
90
+ results = {}
91
+ modules = get_functions()
92
+
93
+ for module in modules:
94
+ try:
95
+ out = await module(email)
96
+ if out:
97
+ results[module.__name__] = out
98
+ except:
99
+ continue
100
+
101
+ return results
102
+
103
+ async def search_image(self, image_url: str) -> Dict[str, Any]:
104
+ """Reverse image search and face recognition"""
105
+ results = {}
106
+
107
+ try:
108
+ # Download image
109
+ response = requests.get(image_url)
110
+ img = Image.open(BytesIO(response.content))
111
+
112
+ # Convert to face_recognition format
113
+ img_array = np.array(img)
114
+ face_locations = face_recognition.face_locations(img_array)
115
+ face_encodings = face_recognition.face_encodings(img_array, face_locations)
116
+
117
+ results['faces_found'] = len(face_locations)
118
+ results['face_locations'] = face_locations
119
+
120
+ # Perform reverse image search
121
+ results['reverse_search'] = await self.reverse_image_search(image_url)
122
+
123
+ except Exception as e:
124
+ results['error'] = str(e)
125
+
126
+ return results
127
+
128
+ async def reverse_image_search(self, image_url: str) -> List[Dict[str, str]]:
129
+ """Perform reverse image search"""
130
+ results = []
131
+
132
+ try:
133
+ driver = webdriver.Chrome(
134
+ service=Service(ChromeDriverManager().install()),
135
+ options=self.chrome_options
136
+ )
137
+
138
+ # Google Images
139
+ search_url = f"https://lens.google.com/uploadbyurl?url={image_url}"
140
+ driver.get(search_url)
141
+ time.sleep(3)
142
+
143
+ # Extract results (simplified)
144
+ results.append({
145
+ 'source': 'Google Lens',
146
+ 'url': driver.current_url
147
+ })
148
+
149
+ driver.quit()
150
+
151
+ except Exception as e:
152
+ results.append({'error': str(e)})
153
+
154
+ return results
155
+
156
+ async def gather_personal_info(self, data: Dict[str, str]) -> Dict[str, Any]:
157
+ """Gather personal information from various sources"""
158
+ results = {}
159
+
160
+ if 'phone' in data:
161
+ results['phone'] = self.analyze_phone_number(data['phone'])
162
+
163
+ if 'location' in data:
164
+ results['location'] = await self.analyze_location(data['location'])
165
+
166
+ if 'domain' in data:
167
+ results['domain'] = self.analyze_domain(data['domain'])
168
+
169
+ return results
170
+
171
+ def analyze_phone_number(self, phone: str) -> Dict[str, Any]:
172
+ """Analyze phone number information"""
173
+ try:
174
+ number = phonenumbers.parse(phone)
175
+ return {
176
+ 'valid': phonenumbers.is_valid_number(number),
177
+ 'type': phonenumbers.number_type(number),
178
+ 'country': geocoder.description_for_number(number, "en"),
179
+ 'carrier': carrier.name_for_number(number, "en"),
180
+ 'timezone': timezone.time_zones_for_number(number)
181
+ }
182
+ except Exception as e:
183
+ return {'error': str(e)}
184
+
185
+ async def analyze_location(self, location: str) -> Dict[str, Any]:
186
+ """Analyze location information"""
187
+ try:
188
+ location_data = self.geolocator.geocode(location)
189
+ if location_data:
190
+ return {
191
+ 'address': location_data.address,
192
+ 'latitude': location_data.latitude,
193
+ 'longitude': location_data.longitude,
194
+ 'raw': location_data.raw
195
+ }
196
+ except Exception as e:
197
+ return {'error': str(e)}
198
+ return None
199
+
200
+ def analyze_domain(self, domain: str) -> Dict[str, Any]:
201
+ """Analyze domain information"""
202
+ try:
203
+ domain_info = whois.whois(domain)
204
+ return {
205
+ 'registrar': domain_info.registrar,
206
+ 'creation_date': domain_info.creation_date,
207
+ 'expiration_date': domain_info.expiration_date,
208
+ 'last_updated': domain_info.updated_date,
209
+ 'status': domain_info.status
210
+ }
211
+ except Exception as e:
212
+ return {'error': str(e)}
213
+
214
+ async def search_social_media(self, username: str, platform: str = None) -> Dict[str, Any]:
215
+ """Search for user information on social media platforms"""
216
+ results = {}
217
+
218
+ if platform:
219
+ platforms = [platform]
220
+ else:
221
+ platforms = ['instagram', 'twitter', 'reddit']
222
+
223
+ for platform in platforms:
224
+ try:
225
+ if platform == 'instagram':
226
+ results['instagram'] = await self.search_instagram(username)
227
+ elif platform == 'twitter':
228
+ results['twitter'] = await self.search_twitter(username)
229
+ elif platform == 'reddit':
230
+ results['reddit'] = await self.search_reddit(username)
231
+ except Exception as e:
232
+ results[platform] = {'error': str(e)}
233
+
234
+ return results
235
+
236
+ async def search_instagram(self, username: str) -> Dict[str, Any]:
237
+ """Search Instagram for user information"""
238
+ try:
239
+ profile = instaloader.Profile.from_username(self.instagram.context, username)
240
+ return {
241
+ 'username': profile.username,
242
+ 'full_name': profile.full_name,
243
+ 'biography': profile.biography,
244
+ 'followers': profile.followers,
245
+ 'following': profile.followees,
246
+ 'is_private': profile.is_private,
247
+ 'is_verified': profile.is_verified,
248
+ 'external_url': profile.external_url,
249
+ 'posts_count': profile.mediacount,
250
+ 'profile_pic_url': profile.profile_pic_url
251
+ }
252
+ except Exception as e:
253
+ return {'error': str(e)}
254
+
255
+ async def search_historical_data(self, url: str) -> List[Dict[str, Any]]:
256
+ """Search for historical data using Wayback Machine"""
257
+ results = []
258
+
259
+ try:
260
+ user_agent = "Mozilla/5.0"
261
+ cdx = WaybackMachineCDXServerAPI(url, user_agent)
262
+
263
+ for snapshot in cdx.snapshots():
264
+ results.append({
265
+ 'timestamp': snapshot.timestamp,
266
+ 'url': snapshot.archive_url,
267
+ 'status': snapshot.status_code,
268
+ 'mime_type': snapshot.mime_type
269
+ })
270
+
271
+ except Exception as e:
272
+ results.append({'error': str(e)})
273
+
274
+ return results
275
+
276
+ def solve_captcha(self, image_url: str) -> str:
277
+ """Solve CAPTCHA using OCR (simplified version)"""
278
+ try:
279
+ response = requests.get(image_url)
280
+ img = Image.open(BytesIO(response.content))
281
+ # Add your CAPTCHA solving logic here
282
+ return "CAPTCHA solution placeholder"
283
+ except Exception as e:
284
+ return f"Error: {str(e)}"
285
+
286
+ # Helper function to create document from gathered information
287
+ def create_report(data: Dict[str, Any], template: str = "default") -> str:
288
+ """Create a formatted report from gathered information"""
289
+ if template == "default":
290
+ report = "# OSINT Investigation Report\n\n"
291
+ report += f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
292
+
293
+ for section, content in data.items():
294
+ report += f"## {section.title()}\n"
295
+ if isinstance(content, dict):
296
+ for key, value in content.items():
297
+ report += f"* {key}: {value}\n"
298
+ elif isinstance(content, list):
299
+ for item in content:
300
+ report += f"* {item}\n"
301
+ else:
302
+ report += f"{content}\n"
303
+ report += "\n"
304
+
305
+ return report
306
+ else:
307
+ raise ValueError(f"Template '{template}' not found")
requirements.txt CHANGED
@@ -8,3 +8,21 @@ sentence-transformers==2.2.2
8
  lxml==4.9.3
9
  requests==2.31.0
10
  protobuf==4.25.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  lxml==4.9.3
9
  requests==2.31.0
10
  protobuf==4.25.1
11
+ pillow==10.1.0
12
+ selenium==4.15.2
13
+ webdriver-manager==4.0.1
14
+ socid-extractor==0.0.24
15
+ holehe==1.61
16
+ sherlock3==0.1
17
+ python-magic==0.4.27
18
+ face-recognition==1.3.0
19
+ opencv-python-headless==4.8.1.78
20
+ googlesearch-python==1.2.3
21
+ instaloader==4.10.1
22
+ tweepy==4.14.0
23
+ praw==7.7.1
24
+ geopy==2.4.1
25
+ phonenumbers==8.13.24
26
+ python-whois==0.8.0
27
+ aiohttp==3.9.1
28
+ waybackpy==3.0.6
search_engine.py CHANGED
@@ -8,6 +8,7 @@ import time
8
  import json
9
  import os
10
  from urllib.parse import urlparse
 
11
 
12
  class ModelManager:
13
  """Manages AI models for text processing"""
@@ -59,6 +60,36 @@ class ContentProcessor:
59
  print(f"Error processing content: {e}")
60
  return {"summary": content[:500] + "...", "insights": []}
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  class WebSearchEngine:
63
  """Main search engine class"""
64
  def __init__(self):
@@ -66,6 +97,7 @@ class WebSearchEngine:
66
  self.session = requests.Session()
67
  self.request_delay = 1.0
68
  self.last_request_time = 0
 
69
 
70
  def is_valid_url(self, url: str) -> bool:
71
  """Check if URL is valid for crawling"""
@@ -148,9 +180,41 @@ class WebSearchEngine:
148
  except Exception as e:
149
  print(f"Error in search: {e}")
150
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  # Main search function
153
  def search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
154
  """Main search function"""
155
  engine = WebSearchEngine()
156
  return engine.search(query, max_results)
 
 
 
 
 
 
 
8
  import json
9
  import os
10
  from urllib.parse import urlparse
11
+ import asyncio
12
 
13
  class ModelManager:
14
  """Manages AI models for text processing"""
 
60
  print(f"Error processing content: {e}")
61
  return {"summary": content[:500] + "...", "insights": []}
62
 
63
+ class OSINTEngine:
64
+ """Main OSINT engine class"""
65
+ def __init__(self):
66
+ pass
67
+
68
+ async def search_username(self, query: str) -> Dict[str, Any]:
69
+ """Search for usernames"""
70
+ # Implement username search logic here
71
+ pass
72
+
73
+ async def search_image(self, query: str) -> Dict[str, Any]:
74
+ """Search for images"""
75
+ # Implement image search logic here
76
+ pass
77
+
78
+ async def search_social_media(self, query: str, platform: str) -> Dict[str, Any]:
79
+ """Search for social media profiles"""
80
+ # Implement social media search logic here
81
+ pass
82
+
83
+ async def gather_personal_info(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
84
+ """Gather personal information"""
85
+ # Implement personal info gathering logic here
86
+ pass
87
+
88
+ async def search_historical_data(self, query: str) -> Dict[str, Any]:
89
+ """Search for historical data"""
90
+ # Implement historical data search logic here
91
+ pass
92
+
93
  class WebSearchEngine:
94
  """Main search engine class"""
95
  def __init__(self):
 
97
  self.session = requests.Session()
98
  self.request_delay = 1.0
99
  self.last_request_time = 0
100
+ self.osint_engine = OSINTEngine() # Add OSINT engine
101
 
102
  def is_valid_url(self, url: str) -> bool:
103
  """Check if URL is valid for crawling"""
 
180
  except Exception as e:
181
  print(f"Error in search: {e}")
182
  return []
183
+
184
+ async def advanced_search(self, query: str, search_type: str = "web", **kwargs) -> Dict[str, Any]:
185
+ """Perform advanced search based on type"""
186
+ results = {}
187
+
188
+ try:
189
+ if search_type == "web":
190
+ results["web"] = self.search(query, kwargs.get("max_results", 5))
191
+ elif search_type == "username":
192
+ results["osint"] = await self.osint_engine.search_username(query)
193
+ elif search_type == "image":
194
+ results["image"] = await self.osint_engine.search_image(query)
195
+ elif search_type == "social":
196
+ results["social"] = await self.osint_engine.search_social_media(
197
+ query,
198
+ kwargs.get("platform")
199
+ )
200
+ elif search_type == "personal":
201
+ results["personal"] = await self.osint_engine.gather_personal_info(kwargs)
202
+ elif search_type == "historical":
203
+ results["historical"] = await self.osint_engine.search_historical_data(query)
204
+
205
+ except Exception as e:
206
+ results["error"] = str(e)
207
+
208
+ return results
209
 
210
  # Main search function
211
  def search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
212
  """Main search function"""
213
  engine = WebSearchEngine()
214
  return engine.search(query, max_results)
215
+
216
+ # Main advanced search function
217
+ async def advanced_search(query: str, search_type: str = "web", **kwargs) -> Dict[str, Any]:
218
+ """Main advanced search function"""
219
+ engine = WebSearchEngine()
220
+ return await engine.advanced_search(query, search_type, **kwargs)