Spaces:

Merlintxu
/

SEO

Sleeping

App Files Files Community

Merlintxu commited on 19 days ago

Commit

a8a2139

verified ·

1 Parent(s): efd8ac1

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -488

app.py CHANGED Viewed

@@ -1,448 +1,51 @@
-import os
 import json
-import logging
-import re
-import requests
-import hashlib
-import PyPDF2
-import numpy as np
-import pandas as pd
-from io import BytesIO
-from typing import List, Dict, Optional, Tuple, Any
-from urllib.parse import urlparse, urljoin
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from bs4 import BeautifulSoup
-from pathlib import Path
-from datetime import datetime
-from collections import defaultdict
-from sklearn.feature_extraction.text import TfidfVectorizer
-from requests.adapters import HTTPAdapter
-from urllib3.util.retry import Retry
-from transformers import pipeline
-from sentence_transformers import SentenceTransformer
-import torch
 import subprocess
 import sys
-import spacy
-import gradio as gr
-import matplotlib.pyplot as plt
-# Configuración de logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
 logger = logging.getLogger(__name__)
-def sanitize_filename(filename: str) -> str:
-    """
-    Sanitiza el nombre de un archivo eliminando o reemplazando caracteres no permitidos.
-    """
-    filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
-    filename = re.sub(r'\s+', '_', filename)
-    return filename
-class SEOSpaceAnalyzer:
     """
-    Clase principal que encapsula la lógica para analizar un sitio web a partir de su sitemap.
     """
-    def __init__(self, max_urls: int = 20, max_workers: int = 4) -> None:
-        """
-        Inicializa la sesión, carga los modelos y configura parámetros.
-        :param max_urls: Número máximo de URLs a procesar en un análisis.
-        :param max_workers: Número de hilos para la ejecución concurrente.
-        """
-        self.max_urls = max_urls
-        self.max_workers = max_workers
-        self.session = self._configure_session()
-        self.models = self._load_models()
-        self.base_dir = Path("content_storage")
-        self.base_dir.mkdir(parents=True, exist_ok=True)
-        self.current_analysis: Dict[str, Any] = {}
-    def _load_models(self) -> Dict[str, Any]:
-        """Carga modelos optimizados para Hugging Face y spaCy."""
-        try:
-            device = 0 if torch.cuda.is_available() else -1
-            logger.info("Cargando modelos NLP...")
-            models = {
-                'summarizer': pipeline("summarization", model="facebook/bart-large-cnn", device=device),
-                'ner': pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple", device=device),
-                'semantic': SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2'),
-                'spacy': spacy.load("es_core_news_lg")
-            }
-            logger.info("Modelos cargados correctamente.")
-            return models
-        except Exception as e:
-            logger.error(f"Error cargando modelos: {e}")
-            raise
-    def _configure_session(self) -> requests.Session:
-        """Configura una sesión HTTP con reintentos y headers personalizados."""
-        session = requests.Session()
-        retry = Retry(
-            total=3,
-            backoff_factor=1,
-            status_forcelist=[500, 502, 503, 504],
-            allowed_methods=['GET', 'HEAD']
-        )
-        adapter = HTTPAdapter(max_retries=retry)
-        session.mount('http://', adapter)
-        session.mount('https://', adapter)
-        session.headers.update({
-            'User-Agent': 'Mozilla/5.0 (compatible; SEOBot/1.0)',
-            'Accept-Language': 'es-ES,es;q=0.9'
-        })
-        return session
-    def analyze_sitemap(self, sitemap_url: str) -> Tuple[Dict, List[str], Dict, Dict]:
-        """
-        Analiza un sitemap completo, procesando URLs en paralelo y generando estadísticas, análisis de contenido, enlaces y recomendaciones SEO.
-        :param sitemap_url: URL del sitemap XML.
-        :return: Tuple con estadísticas, recomendaciones, análisis de contenido y análisis de enlaces.
-        """
-        try:
-            logger.info(f"Parseando sitemap: {sitemap_url}")
-            urls = self._parse_sitemap(sitemap_url)
-            if not urls:
-                logger.warning("No se pudieron extraer URLs del sitemap.")
-                return {"error": "No se pudieron extraer URLs del sitemap"}, [], {}, {}
-            results: List[Dict] = []
-            with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
-                futures = {executor.submit(self._process_url, url): url for url in urls[:self.max_urls]}
-                for future in as_completed(futures):
-                    url = futures[future]
-                    try:
-                        res = future.result()
-                        results.append(res)
-                        logger.info(f"Procesado: {url}")
-                    except Exception as e:
-                        logger.error(f"Error procesando {url}: {e}")
-                        results.append({'url': url, 'status': 'error', 'error': str(e)})
-            self.current_analysis = {
-                'stats': self._calculate_stats(results),
-                'content_analysis': self._analyze_content(results),
-                'links': self._analyze_links(results),
-                'recommendations': self._generate_seo_recommendations(results),
-                'details': results,  # <-- Aquí se incluyen todos los detalles individuales
-                'timestamp': datetime.now().isoformat()
-            }
-            return (self.current_analysis['stats'],
-                    self.current_analysis['recommendations'],
-                    self.current_analysis['content_analysis'],
-                    self.current_analysis['links']),
-        except Exception as e:
-            logger.error(f"Error en análisis: {e}")
-            return {"error": str(e)}, [], {}, {}
-    def _process_url(self, url: str) -> Dict:
-        """Procesa una URL individual y decide el método de procesamiento según el tipo de contenido."""
-        try:
-            response = self.session.get(url, timeout=15)
-            response.raise_for_status()
-            content_type = response.headers.get('Content-Type', '')
-            result: Dict[str, Any] = {'url': url, 'status': 'success'}
-            if 'application/pdf' in content_type:
-                result.update(self._process_pdf(response.content))
-            elif 'text/html' in content_type:
-                result.update(self._process_html(response.text, url))
-            else:
-                result.update({'type': 'unknown', 'content': '', 'word_count': 0})
-            self._save_content(url, response.content)
-            return result
-        except requests.exceptions.RequestException as e:
-            logger.warning(f"Error procesando {url}: {str(e)}")
-            return {'url': url, 'status': 'error', 'error': str(e)}
-        except Exception as e:
-            logger.error(f"Error inesperado en {url}: {str(e)}")
-            return {'url': url, 'status': 'error', 'error': str(e)}
-    def _process_html(self, html: str, base_url: str) -> Dict:
-        """Procesa contenido HTML: extrae y limpia el texto, enlaces y metadatos."""
-        soup = BeautifulSoup(html, 'html.parser')
-        clean_text = self._clean_text(soup.get_text())
-        return {
-            'type': 'html',
-            'content': clean_text,
-            'word_count': len(clean_text.split()),
-            'links': self._extract_links(soup, base_url),
-            'metadata': self._extract_metadata(soup)
-        }
-    def _process_pdf(self, content: bytes) -> Dict:
-        """Procesa documentos PDF extrayendo texto de cada página."""
-        try:
-            text = ""
-            with BytesIO(content) as pdf_file:
-                reader = PyPDF2.PdfReader(pdf_file)
-                for page in reader.pages:
-                    extracted = page.extract_text()
-                    text += extracted if extracted else ""
-            clean_text = self._clean_text(text)
-            return {
-                'type': 'pdf',
-                'content': clean_text,
-                'word_count': len(clean_text.split()),
-                'page_count': len(reader.pages)
-            }
-        except PyPDF2.PdfReadError as e:
-            logger.error(f"Error leyendo PDF: {e}")
-            return {'type': 'pdf', 'error': str(e)}
-    def _clean_text(self, text: str) -> str:
-        """Realiza la limpieza y normalización del texto."""
-        if not text:
-            return ""
-        text = re.sub(r'\s+', ' ', text)
-        return re.sub(r'[^\w\sáéíóúñÁÉÍÓÚÑ]', ' ', text).strip()
-    def _extract_links(self, soup: BeautifulSoup, base_url: str) -> List[Dict]:
-        """Extrae y clasifica enlaces presentes en el HTML."""
-        links: List[Dict] = []
-        base_netloc = urlparse(base_url).netloc
-        for tag in soup.find_all('a', href=True):
-            try:
-                href = tag['href'].strip()
-                if not href or href.startswith('javascript:'):
-                    continue
-                full_url = urljoin(base_url, href)
-                parsed = urlparse(full_url)
-                links.append({
-                    'url': full_url,
-                    'type': 'internal' if parsed.netloc == base_netloc else 'external',
-                    'anchor': self._clean_text(tag.get_text())[:100],
-                    'file_type': self._get_file_type(parsed.path)
-                })
-            except Exception as e:
-                logger.warning(f"Error procesando enlace {tag.get('href')}: {e}")
-                continue
-        return links
-    def _get_file_type(self, path: str) -> str:
-        """Determina el tipo de archivo según la extensión encontrada en la URL."""
-        ext = Path(path).suffix.lower()
-        return ext[1:] if ext else 'html'
-    def _extract_metadata(self, soup: BeautifulSoup) -> Dict:
-        """Extrae metadatos relevantes para SEO (título, descripción, keywords y etiquetas OpenGraph)."""
-        metadata: Dict[str, Any] = {
-            'title': '',
-            'description': '',
-            'keywords': [],
-            'og': {}
-        }
-        if soup.title and soup.title.string:
-            metadata['title'] = soup.title.string.strip()[:200]
-        for meta in soup.find_all('meta'):
-            name = meta.get('name', '').lower()
-            property_ = meta.get('property', '').lower()
-            content = meta.get('content', '')
-            if name == 'description':
-                metadata['description'] = content[:300]
-            elif name == 'keywords':
-                metadata['keywords'] = [kw.strip() for kw in content.split(',') if kw.strip()]
-            elif property_.startswith('og:'):
-                metadata['og'][property_[3:]] = content
-        return metadata
-    def _parse_sitemap(self, sitemap_url: str) -> List[str]:
-        """
-        Parsea un sitemap XML e incluso maneja índices de sitemaps.
-        :return: Lista de URLs encontradas en el sitemap.
-        """
-        try:
-            response = self.session.get(sitemap_url, timeout=10)
-            response.raise_for_status()
-            if 'xml' not in response.headers.get('Content-Type', ''):
-                logger.warning(f"El sitemap no parece ser XML: {sitemap_url}")
-                return []
-            soup = BeautifulSoup(response.text, 'lxml-xml')
-            urls: List[str] = []
-            # Manejo de sitemap index
-            if soup.find('sitemapindex'):
-                for sitemap in soup.find_all('loc'):
-                    url = sitemap.text.strip()
-                    if url.endswith('.xml'):
-                        urls.extend(self._parse_sitemap(url))
-            else:
-                urls = [loc.text.strip() for loc in soup.find_all('loc')]
-            # Filtrar URLs que empiezan por http y eliminar duplicados
-            filtered_urls = list({url for url in urls if url.startswith('http')})
-            return filtered_urls
-        except Exception as e:
-            logger.error(f"Error al parsear el sitemap {sitemap_url}: {e}")
-            return []
-    def _save_content(self, url: str, content: bytes) -> None:
-        """
-        Almacena el contenido descargado en una estructura organizada. Antes de escribir, verifica si ya existe el archivo.
-        """
-        try:
-            parsed = urlparse(url)
-            domain_dir = self.base_dir / parsed.netloc
-            # Construir ruta a partir de la ruta URL
-            path = parsed.path.lstrip('/')
-            if not path or path.endswith('/'):
-                path = os.path.join(path, 'index.html')
-            safe_path = sanitize_filename(path)
-            save_path = domain_dir / safe_path
-            save_path.parent.mkdir(parents=True, exist_ok=True)
-            # Calcula hash del contenido y evita re-escribir si el archivo existe y es idéntico
-            new_hash = hashlib.md5(content).hexdigest()
-            if save_path.exists():
-                with open(save_path, 'rb') as f:
-                    existing_content = f.read()
-                existing_hash = hashlib.md5(existing_content).hexdigest()
-                if new_hash == existing_hash:
-                    logger.debug(f"El contenido de {url} ya está guardado y es idéntico.")
-                    return
-            with open(save_path, 'wb') as f:
-                f.write(content)
-            logger.info(f"Contenido guardado en: {save_path}")
-        except Exception as e:
-            logger.error(f"Error al guardar contenido para {url}: {e}")
-    def _calculate_stats(self, results: List[Dict]) -> Dict:
-        """Calcula estadísticas básicas sobre el conjunto de resultados procesados."""
-        successful = [r for r in results if r.get('status') == 'success']
-        content_types = [r.get('type', 'unknown') for r in successful]
-        avg_word_count = round(np.mean([r.get('word_count', 0) for r in successful]) if successful else 0, 1)
-        return {
-            'total_urls': len(results),
-            'successful': len(successful),
-            'failed': len(results) - len(successful),
-            'content_types': pd.Series(content_types).value_counts().to_dict(),
-            'avg_word_count': avg_word_count,
-            'failed_urls': [r['url'] for r in results if r.get('status') != 'success']
-        }
-    def _analyze_content(self, results: List[Dict]) -> Dict:
-        """
-        Analiza el contenido extraído usando TF-IDF y muestra algunas muestras.
-        :return: Diccionario con keywords y ejemplos de contenido.
-        """
-        successful = [r for r in results if r.get('status') == 'success' and r.get('content')]
-        texts = [r['content'] for r in successful if len(r['content'].split()) > 10]
-        if not texts:
-            return {'top_keywords': [], 'content_samples': []}
         try:
-            stop_words = list(self.models['spacy'].Defaults.stop_words)
-            vectorizer = TfidfVectorizer(stop_words=stop_words, max_features=50, ngram_range=(1, 2))
-            tfidf = vectorizer.fit_transform(texts)
-            feature_names = vectorizer.get_feature_names_out()
-            sorted_indices = np.argsort(np.asarray(tfidf.sum(axis=0)).ravel())[-10:]
-            top_keywords = feature_names[sorted_indices][::-1].tolist()
-        except Exception as e:
-            logger.error(f"Error en análisis TF-IDF: {e}")
-            top_keywords = []
-        return {
-            'top_keywords': top_keywords,
-            'content_samples': [{'url': r['url'], 'sample': (r['content'][:500] + '...') if len(r['content']) > 500 else r['content']}
-                                for r in successful[:3]]
-        }
-    def _analyze_links(self, results: List[Dict]) -> Dict:
-        """
-        Analiza la estructura de enlaces en el contenido procesado.
-        :return: Estadísticas de enlaces internos, dominios externos, anclas y tipos de archivos.
-        """
-        all_links = []
-        for result in results:
-            if result.get('links'):
-                all_links.extend(result['links'])
-        if not all_links:
-            return {
-                'internal_links': {},
-                'external_domains': {},
-                'common_anchors': {},
-                'file_types': {}
-            }
-        df = pd.DataFrame(all_links)
-        return {
-            'internal_links': df[df['type'] == 'internal']['url'].value_counts().head(20).to_dict(),
-            'external_domains': df[df['type'] == 'external']['url']
-                                .apply(lambda x: urlparse(x).netloc)
-                                .value_counts().head(10).to_dict(),
-            'common_anchors': df['anchor'].value_counts().head(10).to_dict(),
-            'file_types': df['file_type'].value_counts().to_dict()
-        }
-    def _generate_seo_recommendations(self, results: List[Dict]) -> List[str]:
-        """
-        Genera recomendaciones SEO basadas en metadatos, cantidad de contenido y estructura de enlaces.
-        :return: Lista de recomendaciones.
-        """
-        successful = [r for r in results if r.get('status') == 'success']
-        if not successful:
-            return ["No se pudo analizar ningún contenido exitosamente"]
-        recs = []
-        missing_titles = sum(1 for r in successful if not r.get('metadata', {}).get('title'))
-        if missing_titles:
-            recs.append(f"📌 Añadir títulos a {missing_titles} páginas")
-        short_descriptions = sum(1 for r in successful if not r.get('metadata', {}).get('description'))
-        if short_descriptions:
-            recs.append(f"📌 Añadir meta descripciones a {short_descriptions} páginas")
-        short_content = sum(1 for r in successful if r.get('word_count', 0) < 300)
-        if short_content:
-            recs.append(f"📝 Ampliar contenido en {short_content} páginas (menos de 300 palabras)")
-        all_links = [link for r in results for link in r.get('links', [])]
-        if all_links:
-            df_links = pd.DataFrame(all_links)
-            internal_links = df_links[df_links['type'] == 'internal']
-            if len(internal_links) > 100:
-                recs.append(f"🔗 Optimizar estructura de enlaces internos ({len(internal_links)} enlaces)")
-        return recs if recs else ["✅ No se detectaron problemas críticos de SEO"]
-    def _plot_internal_links(self, links_data: Dict) -> Optional[plt.Figure]:
-        """
-        Genera un gráfico de barras para la distribución de enlaces internos.
-        :param links_data: Diccionario con los enlaces internos.
-        :return: Figura de matplotlib o None si no hay datos.
-        """
-        internal_links = links_data.get('internal_links', {})
-        if not internal_links:
-            return None
-        fig, ax = plt.subplots()
-        names = list(internal_links.keys())
-        counts = list(internal_links.values())
-        ax.barh(names, counts)
-        ax.set_xlabel("Cantidad de enlaces")
-        ax.set_title("Top 20 Enlaces Internos")
-        plt.tight_layout()
-        return fig
 def create_interface() -> gr.Blocks:
-    """
-    Crea la interfaz de usuario utilizando Gradio.
-    """
     analyzer = SEOSpaceAnalyzer()
     with gr.Blocks(title="SEO Analyzer Pro", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🕵️ SEO Analyzer Pro
         **Analizador SEO avanzado con modelos de lenguaje**
-        Sube la URL de un sitemap.xml para analizar todo el sitio web.
         """)
         with gr.Row():
             with gr.Column():
-                sitemap_input = gr.Textbox(label="URL del Sitemap",
-                                           placeholder="https://ejemplo.com/sitemap.xml",
-                                           interactive=True)
                 analyze_btn = gr.Button("Analizar Sitio", variant="primary")
                 with gr.Row():
                     clear_btn = gr.Button("Limpiar")
@@ -450,97 +53,48 @@ def create_interface() -> gr.Blocks:
                     plot_btn = gr.Button("Visualizar Enlaces Internos", variant="secondary")
             with gr.Column():
                 status_output = gr.Textbox(label="Estado del Análisis", interactive=False)
-                progress_bar = gr.Progress()
         with gr.Tabs():
             with gr.Tab("📊 Resumen"):
                 stats_output = gr.JSON(label="Estadísticas Generales")
                 recommendations_output = gr.JSON(label="Recomendaciones SEO")
             with gr.Tab("📝 Contenido"):
                 content_output = gr.JSON(label="Análisis de Contenido")
-                gr.Examples(
-                    examples=[{"content": "Ejemplo de análisis de contenido..."}],
-                    inputs=[content_output],
-                    label="Ejemplos de Salida"
-                )
             with gr.Tab("🔗 Enlaces"):
                 links_output = gr.JSON(label="Análisis de Enlaces")
                 links_plot = gr.Plot(label="Visualización de Enlaces Internos")
-            with gr.Tab("📂 Documentos"):
-                gr.Markdown("""
-                ### Documentos Encontrados
-                Los documentos descargados se guardan en la carpeta `content_storage/`
-                """)
-        # Función que genera el reporte y lo guarda en disco
-        def generate_report() -> Optional[str]:
             if analyzer.current_analysis:
                 report_path = "content_storage/seo_report.json"
-                try:
-                    with open(report_path, 'w', encoding='utf-8') as f:
-                        json.dump(analyzer.current_analysis, f, indent=2, ensure_ascii=False)
-                    return report_path
-                except Exception as e:
-                    logger.error(f"Error generando reporte: {e}")
-                    return None
-            return None
-        # Callback para generar gráfico de enlaces internos a partir del análisis almacenado
-        def generate_internal_links_plot(links_json: Dict) -> Any:
-            fig = analyzer._plot_internal_links(links_json)
-            return fig if fig is not None else {}
-        # Asignación de acciones a botones y otros eventos
         analyze_btn.click(
             fn=analyzer.analyze_sitemap,
             inputs=sitemap_input,
-            outputs=[stats_output, recommendations_output, content_output, links_output],
             show_progress=True
         )
         clear_btn.click(
-            fn=lambda: [None] * 4,
-            outputs=[stats_output, recommendations_output, content_output, links_output]
         )
         download_btn.click(
             fn=generate_report,
             outputs=gr.File(label="Descargar Reporte")
         )
         plot_btn.click(
-            fn=generate_internal_links_plot,
             inputs=links_output,
             outputs=links_plot
         )
     return interface
-def setup_spacy_model() -> None:
-    """
-    Verifica y descarga el modelo de spaCy 'es_core_news_lg' si no está instalado.
-    """
-    try:
-        spacy.load("es_core_news_lg")
-        logger.info("Modelo spaCy 'es_core_news_lg' cargado correctamente.")
-    except OSError:
-        logger.info("Descargando modelo spaCy 'es_core_news_lg'...")
-        try:
-            subprocess.run(
-                [sys.executable, "-m", "spacy", "download", "es_core_news_lg"],
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
-            )
-            logger.info("Modelo descargado exitosamente.")
-        except subprocess.CalledProcessError as e:
-            logger.error(f"Error al descargar modelo: {e.stderr.decode()}")
-            raise RuntimeError("No se pudo descargar el modelo spaCy") from e
 if __name__ == "__main__":
     setup_spacy_model()
     app = create_interface()
-    app.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        share=False
-    )

+import gradio as gr
 import json
+from seo_analyzer import SEOSpaceAnalyzer
+import spacy
 import subprocess
 import sys
+import logging
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+def setup_spacy_model() -> None:
     """
+    Verifica y descarga el modelo de spaCy 'es_core_news_lg' si no está instalado.
     """
+    try:
+        spacy.load("es_core_news_lg")
+        logger.info("Modelo spaCy 'es_core_news_lg' cargado correctamente.")
+    except OSError:
+        logger.info("Descargando modelo spaCy 'es_core_news_lg'...")
         try:
+            subprocess.run(
+                [sys.executable, "-m", "spacy", "download", "es_core_news_lg"],
+                check=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+            logger.info("Modelo descargado exitosamente.")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Error al descargar modelo: {e.stderr.decode()}")
+            raise RuntimeError("No se pudo descargar el modelo spaCy") from e
 def create_interface() -> gr.Blocks:
     analyzer = SEOSpaceAnalyzer()
     with gr.Blocks(title="SEO Analyzer Pro", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🕵️ SEO Analyzer Pro
         **Analizador SEO avanzado con modelos de lenguaje**
+        Ingresa la URL de un sitemap.xml para analizar el sitio web.
         """)
         with gr.Row():
             with gr.Column():
+                sitemap_input = gr.Textbox(
+                    label="URL del Sitemap",
+                    placeholder="https://ejemplo.com/sitemap.xml",
+                    interactive=True
+                )
                 analyze_btn = gr.Button("Analizar Sitio", variant="primary")
                 with gr.Row():
                     clear_btn = gr.Button("Limpiar")
                     plot_btn = gr.Button("Visualizar Enlaces Internos", variant="secondary")
             with gr.Column():
                 status_output = gr.Textbox(label="Estado del Análisis", interactive=False)
         with gr.Tabs():
             with gr.Tab("📊 Resumen"):
                 stats_output = gr.JSON(label="Estadísticas Generales")
                 recommendations_output = gr.JSON(label="Recomendaciones SEO")
             with gr.Tab("📝 Contenido"):
                 content_output = gr.JSON(label="Análisis de Contenido")
             with gr.Tab("🔗 Enlaces"):
                 links_output = gr.JSON(label="Análisis de Enlaces")
                 links_plot = gr.Plot(label="Visualización de Enlaces Internos")
+            with gr.Tab("📄 Detalles"):
+                details_output = gr.JSON(label="Detalles Individuales")
+        def generate_report() -> str:
             if analyzer.current_analysis:
                 report_path = "content_storage/seo_report.json"
+                with open(report_path, 'w', encoding='utf-8') as f:
+                    json.dump(analyzer.current_analysis, f, indent=2, ensure_ascii=False)
+                return report_path
+            return ""
+        def plot_internal_links(links_json: dict) -> any:
+            return analyzer.plot_internal_links(links_json)
         analyze_btn.click(
             fn=analyzer.analyze_sitemap,
             inputs=sitemap_input,
+            outputs=[stats_output, recommendations_output, content_output, links_output, details_output],
             show_progress=True
         )
         clear_btn.click(
+            fn=lambda: [None, None, None, None, None],
+            outputs=[stats_output, recommendations_output, content_output, links_output, details_output]
         )
         download_btn.click(
             fn=generate_report,
             outputs=gr.File(label="Descargar Reporte")
         )
         plot_btn.click(
+            fn=plot_internal_links,
             inputs=links_output,
             outputs=links_plot
         )
     return interface
 if __name__ == "__main__":
     setup_spacy_model()
     app = create_interface()
+    app.launch(server_name="0.0.0.0", server_port=7860, show_error=True, share=False)