Spaces:

tensor-boy
/

ISE

Runtime error

App Files Files Community

fikird commited on Nov 29, 2024

Commit

12d42ff

1 Parent(s): a23c67c

Add OSINT capabilities and advanced search features

Browse files

Files changed (4) hide show

app.py +157 -37
osint_engine.py +307 -0
requirements.txt +18 -0
search_engine.py +64 -0

app.py CHANGED Viewed

@@ -1,11 +1,15 @@
 import gradio as gr
-from search_engine import search
-def safe_search(query, max_results=5):
-    try:
-        results = search(query, max_results)
-        formatted_results = []
         for result in results:
             formatted_result = f"""
 ### [{result['title']}]({result['url']})
@@ -16,42 +20,158 @@ def safe_search(query, max_results=5):
 **Published:** {result.get('published_date', 'N/A')}
             """
             formatted_results.append(formatted_result)
         return "\n---\n".join(formatted_results)
     except Exception as e:
         return f"Error: {str(e)}"
 # Create Gradio interface
-demo = gr.Interface(
-    fn=safe_search,
-    inputs=[
-        gr.Textbox(
-            label="Search Query",
-            placeholder="Enter your search query...",
-            lines=2
-        ),
-        gr.Slider(
-            minimum=1,
-            maximum=10,
-            value=5,
-            step=1,
-            label="Number of Results"
-        )
-    ],
-    outputs=gr.Markdown(label="Search Results"),
-    title="🔍 Intelligent Search Engine",
-    description="""
-    An AI-powered search engine that provides intelligent summaries and insights from web content.
     Features:
-    - Smart content summarization
-    - Semantic search capabilities
-    - Clean, readable results
-    """,
-    examples=[
-        ["Latest developments in artificial intelligence", 3],
-        ["Climate change solutions", 5],
-        ["Space exploration news", 4]
-    ],
-    theme=gr.themes.Soft()
-)

 import gradio as gr
+import asyncio
+from search_engine import search, advanced_search
+from osint_engine import create_report
+def format_results(results):
+    if not results:
+        return "No results found."
+    if isinstance(results, list):
+        # Format web search results
+        formatted_results = []
         for result in results:
             formatted_result = f"""
 ### [{result['title']}]({result['url']})
 **Published:** {result.get('published_date', 'N/A')}
             """
             formatted_results.append(formatted_result)
         return "\n---\n".join(formatted_results)
+    elif isinstance(results, dict):
+        # Format OSINT results
+        return create_report(results)
+    else:
+        return str(results)
+def safe_search(query, search_type="web", max_results=5, platform=None,
+                image_url=None, phone=None, location=None, domain=None):
+    """Safe wrapper for search functions"""
+    try:
+        kwargs = {
+            "max_results": max_results,
+            "platform": platform,
+            "phone": phone,
+            "location": location,
+            "domain": domain
+        }
+        if search_type == "web":
+            results = search(query, max_results)
+        else:
+            # For async searches
+            if search_type == "image" and image_url:
+                query = image_url
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            results = loop.run_until_complete(advanced_search(query, search_type, **kwargs))
+            loop.close()
+        return format_results(results)
     except Exception as e:
         return f"Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🔍 Intelligent Search Engine")
+    gr.Markdown("""
+    An AI-powered search engine with advanced OSINT capabilities.
     Features:
+    - Web search with AI summaries
+    - Username search across platforms
+    - Image search and analysis
+    - Social media profile search
+    - Personal information gathering
+    - Historical data search
+    """)
+    with gr.Tab("Web Search"):
+        with gr.Row():
+            query_input = gr.Textbox(
+                label="Search Query",
+                placeholder="Enter your search query...",
+                lines=2
+            )
+            max_results = gr.Slider(
+                minimum=1,
+                maximum=10,
+                value=5,
+                step=1,
+                label="Number of Results"
+            )
+        search_button = gr.Button("Search")
+        results_output = gr.Markdown(label="Search Results")
+        search_button.click(
+            fn=lambda q, n: safe_search(q, "web", n),
+            inputs=[query_input, max_results],
+            outputs=results_output
+        )
+    with gr.Tab("Username Search"):
+        username_input = gr.Textbox(
+            label="Username",
+            placeholder="Enter username to search..."
+        )
+        username_button = gr.Button("Search Username")
+        username_output = gr.Markdown(label="Username Search Results")
+        username_button.click(
+            fn=lambda u: safe_search(u, "username"),
+            inputs=username_input,
+            outputs=username_output
+        )
+    with gr.Tab("Image Search"):
+        image_url = gr.Textbox(
+            label="Image URL",
+            placeholder="Enter image URL to search..."
+        )
+        image_button = gr.Button("Search Image")
+        image_output = gr.Markdown(label="Image Search Results")
+        image_button.click(
+            fn=lambda u: safe_search(u, "image", image_url=u),
+            inputs=image_url,
+            outputs=image_output
+        )
+    with gr.Tab("Social Media Search"):
+        with gr.Row():
+            social_username = gr.Textbox(
+                label="Username",
+                placeholder="Enter username..."
+            )
+            platform = gr.Dropdown(
+                choices=["all", "instagram", "twitter", "reddit"],
+                value="all",
+                label="Platform"
+            )
+        social_button = gr.Button("Search Social Media")
+        social_output = gr.Markdown(label="Social Media Results")
+        social_button.click(
+            fn=lambda u, p: safe_search(u, "social", platform=p),
+            inputs=[social_username, platform],
+            outputs=social_output
+        )
+    with gr.Tab("Personal Info"):
+        with gr.Row():
+            phone = gr.Textbox(label="Phone Number", placeholder="+1234567890")
+            location = gr.Textbox(label="Location", placeholder="City, Country")
+            domain = gr.Textbox(label="Domain", placeholder="example.com")
+        personal_button = gr.Button("Gather Information")
+        personal_output = gr.Markdown(label="Personal Information Results")
+        personal_button.click(
+            fn=lambda p, l, d: safe_search("", "personal", phone=p, location=l, domain=d),
+            inputs=[phone, location, domain],
+            outputs=personal_output
+        )
+    with gr.Tab("Historical Data"):
+        url_input = gr.Textbox(
+            label="URL",
+            placeholder="Enter URL to search historical data..."
+        )
+        historical_button = gr.Button("Search Historical Data")
+        historical_output = gr.Markdown(label="Historical Data Results")
+        historical_button.click(
+            fn=lambda u: safe_search(u, "historical"),
+            inputs=url_input,
+            outputs=historical_output
+        )
+    gr.Markdown("""
+    ### Examples
+    Try these example searches:
+    - Web Search: "Latest developments in artificial intelligence"
+    - Username: "johndoe"
+    - Image URL: "https://example.com/image.jpg"
+    - Social Media: "techuser" on Twitter
+    - Historical Data: "example.com"
+    """)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

osint_engine.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import os
+import re
+import json
+import time
+import asyncio
+import aiohttp
+import requests
+import instaloader
+import face_recognition
+import numpy as np
+from PIL import Image
+from io import BytesIO
+from typing import Dict, List, Any, Union
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+from holehe.core import *
+from sherlock import sherlock
+from geopy.geocoders import Nominatim
+from waybackpy import WaybackMachineCDXServerAPI
+import phonenumbers
+from phonenumbers import geocoder, carrier, timezone
+import whois
+from datetime import datetime
+class OSINTEngine:
+    """OSINT capabilities for advanced information gathering"""
+    def __init__(self):
+        self.chrome_options = Options()
+        self.chrome_options.add_argument('--headless')
+        self.chrome_options.add_argument('--no-sandbox')
+        self.chrome_options.add_argument('--disable-dev-shm-usage')
+        self.setup_apis()
+    def setup_apis(self):
+        """Initialize API clients"""
+        self.instagram = instaloader.Instaloader()
+        self.geolocator = Nominatim(user_agent="intelligent_search")
+    async def search_username(self, username: str) -> Dict[str, Any]:
+        """Search for username across multiple platforms"""
+        results = {}
+        # Sherlock search
+        sherlock_results = await self.sherlock_search(username)
+        results['platforms'] = sherlock_results
+        # Email search
+        email_results = await self.search_email(f"{username}@gmail.com")
+        results['email'] = email_results
+        return results
+    async def sherlock_search(self, username: str) -> List[Dict[str, str]]:
+        """Search username using Sherlock"""
+        results = []
+        sites = sherlock.site_data()
+        async with aiohttp.ClientSession() as session:
+            tasks = []
+            for site_name, site_data in sites.items():
+                task = self.check_username(session, username, site_name, site_data)
+                tasks.append(task)
+            results = await asyncio.gather(*tasks)
+            return [r for r in results if r is not None]
+    async def check_username(self, session, username: str, site_name: str, site_data: Dict) -> Dict[str, str]:
+        """Check username on a specific platform"""
+        url = site_data.get('url', '').format(username=username)
+        if not url:
+            return None
+        try:
+            async with session.get(url) as response:
+                if response.status == 200:
+                    return {
+                        'platform': site_name,
+                        'url': url,
+                        'found': True
+                    }
+        except:
+            pass
+        return None
+    async def search_email(self, email: str) -> Dict[str, Any]:
+        """Search for email presence on various platforms"""
+        results = {}
+        modules = get_functions()
+        for module in modules:
+            try:
+                out = await module(email)
+                if out:
+                    results[module.__name__] = out
+            except:
+                continue
+        return results
+    async def search_image(self, image_url: str) -> Dict[str, Any]:
+        """Reverse image search and face recognition"""
+        results = {}
+        try:
+            # Download image
+            response = requests.get(image_url)
+            img = Image.open(BytesIO(response.content))
+            # Convert to face_recognition format
+            img_array = np.array(img)
+            face_locations = face_recognition.face_locations(img_array)
+            face_encodings = face_recognition.face_encodings(img_array, face_locations)
+            results['faces_found'] = len(face_locations)
+            results['face_locations'] = face_locations
+            # Perform reverse image search
+            results['reverse_search'] = await self.reverse_image_search(image_url)
+        except Exception as e:
+            results['error'] = str(e)
+        return results
+    async def reverse_image_search(self, image_url: str) -> List[Dict[str, str]]:
+        """Perform reverse image search"""
+        results = []
+        try:
+            driver = webdriver.Chrome(
+                service=Service(ChromeDriverManager().install()),
+                options=self.chrome_options
+            )
+            # Google Images
+            search_url = f"https://lens.google.com/uploadbyurl?url={image_url}"
+            driver.get(search_url)
+            time.sleep(3)
+            # Extract results (simplified)
+            results.append({
+                'source': 'Google Lens',
+                'url': driver.current_url
+            })
+            driver.quit()
+        except Exception as e:
+            results.append({'error': str(e)})
+        return results
+    async def gather_personal_info(self, data: Dict[str, str]) -> Dict[str, Any]:
+        """Gather personal information from various sources"""
+        results = {}
+        if 'phone' in data:
+            results['phone'] = self.analyze_phone_number(data['phone'])
+        if 'location' in data:
+            results['location'] = await self.analyze_location(data['location'])
+        if 'domain' in data:
+            results['domain'] = self.analyze_domain(data['domain'])
+        return results
+    def analyze_phone_number(self, phone: str) -> Dict[str, Any]:
+        """Analyze phone number information"""
+        try:
+            number = phonenumbers.parse(phone)
+            return {
+                'valid': phonenumbers.is_valid_number(number),
+                'type': phonenumbers.number_type(number),
+                'country': geocoder.description_for_number(number, "en"),
+                'carrier': carrier.name_for_number(number, "en"),
+                'timezone': timezone.time_zones_for_number(number)
+            }
+        except Exception as e:
+            return {'error': str(e)}
+    async def analyze_location(self, location: str) -> Dict[str, Any]:
+        """Analyze location information"""
+        try:
+            location_data = self.geolocator.geocode(location)
+            if location_data:
+                return {
+                    'address': location_data.address,
+                    'latitude': location_data.latitude,
+                    'longitude': location_data.longitude,
+                    'raw': location_data.raw
+                }
+        except Exception as e:
+            return {'error': str(e)}
+        return None
+    def analyze_domain(self, domain: str) -> Dict[str, Any]:
+        """Analyze domain information"""
+        try:
+            domain_info = whois.whois(domain)
+            return {
+                'registrar': domain_info.registrar,
+                'creation_date': domain_info.creation_date,
+                'expiration_date': domain_info.expiration_date,
+                'last_updated': domain_info.updated_date,
+                'status': domain_info.status
+            }
+        except Exception as e:
+            return {'error': str(e)}
+    async def search_social_media(self, username: str, platform: str = None) -> Dict[str, Any]:
+        """Search for user information on social media platforms"""
+        results = {}
+        if platform:
+            platforms = [platform]
+        else:
+            platforms = ['instagram', 'twitter', 'reddit']
+        for platform in platforms:
+            try:
+                if platform == 'instagram':
+                    results['instagram'] = await self.search_instagram(username)
+                elif platform == 'twitter':
+                    results['twitter'] = await self.search_twitter(username)
+                elif platform == 'reddit':
+                    results['reddit'] = await self.search_reddit(username)
+            except Exception as e:
+                results[platform] = {'error': str(e)}
+        return results
+    async def search_instagram(self, username: str) -> Dict[str, Any]:
+        """Search Instagram for user information"""
+        try:
+            profile = instaloader.Profile.from_username(self.instagram.context, username)
+            return {
+                'username': profile.username,
+                'full_name': profile.full_name,
+                'biography': profile.biography,
+                'followers': profile.followers,
+                'following': profile.followees,
+                'is_private': profile.is_private,
+                'is_verified': profile.is_verified,
+                'external_url': profile.external_url,
+                'posts_count': profile.mediacount,
+                'profile_pic_url': profile.profile_pic_url
+            }
+        except Exception as e:
+            return {'error': str(e)}
+    async def search_historical_data(self, url: str) -> List[Dict[str, Any]]:
+        """Search for historical data using Wayback Machine"""
+        results = []
+        try:
+            user_agent = "Mozilla/5.0"
+            cdx = WaybackMachineCDXServerAPI(url, user_agent)
+            for snapshot in cdx.snapshots():
+                results.append({
+                    'timestamp': snapshot.timestamp,
+                    'url': snapshot.archive_url,
+                    'status': snapshot.status_code,
+                    'mime_type': snapshot.mime_type
+                })
+        except Exception as e:
+            results.append({'error': str(e)})
+        return results
+    def solve_captcha(self, image_url: str) -> str:
+        """Solve CAPTCHA using OCR (simplified version)"""
+        try:
+            response = requests.get(image_url)
+            img = Image.open(BytesIO(response.content))
+            # Add your CAPTCHA solving logic here
+            return "CAPTCHA solution placeholder"
+        except Exception as e:
+            return f"Error: {str(e)}"
+# Helper function to create document from gathered information
+def create_report(data: Dict[str, Any], template: str = "default") -> str:
+    """Create a formatted report from gathered information"""
+    if template == "default":
+        report = "# OSINT Investigation Report\n\n"
+        report += f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+        for section, content in data.items():
+            report += f"## {section.title()}\n"
+            if isinstance(content, dict):
+                for key, value in content.items():
+                    report += f"* {key}: {value}\n"
+            elif isinstance(content, list):
+                for item in content:
+                    report += f"* {item}\n"
+            else:
+                report += f"{content}\n"
+            report += "\n"
+        return report
+    else:
+        raise ValueError(f"Template '{template}' not found")

requirements.txt CHANGED Viewed

@@ -8,3 +8,21 @@ sentence-transformers==2.2.2
 lxml==4.9.3
 requests==2.31.0
 protobuf==4.25.1

 lxml==4.9.3
 requests==2.31.0
 protobuf==4.25.1
+pillow==10.1.0
+selenium==4.15.2
+webdriver-manager==4.0.1
+socid-extractor==0.0.24
+holehe==1.61
+sherlock3==0.1
+python-magic==0.4.27
+face-recognition==1.3.0
+opencv-python-headless==4.8.1.78
+googlesearch-python==1.2.3
+instaloader==4.10.1
+tweepy==4.14.0
+praw==7.7.1
+geopy==2.4.1
+phonenumbers==8.13.24
+python-whois==0.8.0
+aiohttp==3.9.1
+waybackpy==3.0.6

search_engine.py CHANGED Viewed

@@ -8,6 +8,7 @@ import time
 import json
 import os
 from urllib.parse import urlparse
 class ModelManager:
     """Manages AI models for text processing"""
@@ -59,6 +60,36 @@ class ContentProcessor:
             print(f"Error processing content: {e}")
             return {"summary": content[:500] + "...", "insights": []}
 class WebSearchEngine:
     """Main search engine class"""
     def __init__(self):
@@ -66,6 +97,7 @@ class WebSearchEngine:
         self.session = requests.Session()
         self.request_delay = 1.0
         self.last_request_time = 0
     def is_valid_url(self, url: str) -> bool:
         """Check if URL is valid for crawling"""
@@ -148,9 +180,41 @@ class WebSearchEngine:
         except Exception as e:
             print(f"Error in search: {e}")
             return []
 # Main search function
 def search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
     """Main search function"""
     engine = WebSearchEngine()
     return engine.search(query, max_results)

 import json
 import os
 from urllib.parse import urlparse
+import asyncio
 class ModelManager:
     """Manages AI models for text processing"""
             print(f"Error processing content: {e}")
             return {"summary": content[:500] + "...", "insights": []}
+class OSINTEngine:
+    """Main OSINT engine class"""
+    def __init__(self):
+        pass
+    async def search_username(self, query: str) -> Dict[str, Any]:
+        """Search for usernames"""
+        # Implement username search logic here
+        pass
+    async def search_image(self, query: str) -> Dict[str, Any]:
+        """Search for images"""
+        # Implement image search logic here
+        pass
+    async def search_social_media(self, query: str, platform: str) -> Dict[str, Any]:
+        """Search for social media profiles"""
+        # Implement social media search logic here
+        pass
+    async def gather_personal_info(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """Gather personal information"""
+        # Implement personal info gathering logic here
+        pass
+    async def search_historical_data(self, query: str) -> Dict[str, Any]:
+        """Search for historical data"""
+        # Implement historical data search logic here
+        pass
 class WebSearchEngine:
     """Main search engine class"""
     def __init__(self):
         self.session = requests.Session()
         self.request_delay = 1.0
         self.last_request_time = 0
+        self.osint_engine = OSINTEngine()  # Add OSINT engine
     def is_valid_url(self, url: str) -> bool:
         """Check if URL is valid for crawling"""
         except Exception as e:
             print(f"Error in search: {e}")
             return []
+    async def advanced_search(self, query: str, search_type: str = "web", **kwargs) -> Dict[str, Any]:
+        """Perform advanced search based on type"""
+        results = {}
+        try:
+            if search_type == "web":
+                results["web"] = self.search(query, kwargs.get("max_results", 5))
+            elif search_type == "username":
+                results["osint"] = await self.osint_engine.search_username(query)
+            elif search_type == "image":
+                results["image"] = await self.osint_engine.search_image(query)
+            elif search_type == "social":
+                results["social"] = await self.osint_engine.search_social_media(
+                    query,
+                    kwargs.get("platform")
+                )
+            elif search_type == "personal":
+                results["personal"] = await self.osint_engine.gather_personal_info(kwargs)
+            elif search_type == "historical":
+                results["historical"] = await self.osint_engine.search_historical_data(query)
+        except Exception as e:
+            results["error"] = str(e)
+        return results
 # Main search function
 def search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
     """Main search function"""
     engine = WebSearchEngine()
     return engine.search(query, max_results)
+# Main advanced search function
+async def advanced_search(query: str, search_type: str = "web", **kwargs) -> Dict[str, Any]:
+    """Main advanced search function"""
+    engine = WebSearchEngine()
+    return await engine.advanced_search(query, search_type, **kwargs)