PapersImpact

Running on Zero

App Files Files Community

openfree commited on 13 days ago

Commit

3643f99

verified ·

1 Parent(s): 822a9a7

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -199

app.py CHANGED Viewed

@@ -9,29 +9,30 @@ import requests
 from urllib.parse import urlparse
 import xml.etree.ElementTree as ET
-# Model repository path and device selection
 model_path = "ssocean/NAIP"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Global model/tokenizer variables
 model = None
 tokenizer = None
 def fetch_arxiv_paper(arxiv_input):
     """
-    Fetch paper details (title, abstract) from an arXiv URL or ID using requests.
     """
     try:
-        # If user passed a full arxiv.org link, parse out the ID
         if "arxiv.org" in arxiv_input:
             parsed = urlparse(arxiv_input)
             path = parsed.path
             arxiv_id = path.split("/")[-1].replace(".pdf", "")
         else:
-            # Otherwise just use the raw ID
             arxiv_id = arxiv_input.strip()
-        # ArXiv API query
         api_url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
         resp = requests.get(api_url)
         if resp.status_code != 200:
@@ -39,64 +40,71 @@ def fetch_arxiv_paper(arxiv_input):
                 "title": "",
                 "abstract": "",
                 "success": False,
-                "message": "Error fetching paper from arXiv API"
             }
-        # Parse XML response
         root = ET.fromstring(resp.text)
         ns = {"arxiv": "http://www.w3.org/2005/Atom"}
         entry = root.find(".//arxiv:entry", ns)
         if entry is None:
-            return {
-                "title": "",
-                "abstract": "",
-                "success": False,
-                "message": "Paper not found"
-            }
         title = entry.find("arxiv:title", ns).text.strip()
         abstract = entry.find("arxiv:summary", ns).text.strip()
         return {
             "title": title,
             "abstract": abstract,
             "success": True,
-            "message": "Paper fetched successfully!"
         }
     except Exception as e:
         return {
             "title": "",
             "abstract": "",
             "success": False,
-            "message": f"Error fetching paper: {e}"
         }
 @spaces.GPU(duration=60, enable_queue=True)
 def predict(title, abstract):
     """
-    Predict a normalized academic impact score (0–1) given the paper title & abstract.
-    Loads the model once globally, then uses it for inference.
     """
     global model, tokenizer
     if model is None:
-        # Load model config, disable quantization, and set number of labels if needed
         config = AutoConfig.from_pretrained(model_path)
-        config.quantization_config = None
-        config.num_labels = 1  # For classification/logit output
-        # IMPORTANT: Do not pass num_labels directly into from_pretrained for LLaMA-based models
-        model = AutoModelForSequenceClassification.from_pretrained(
             model_path,
-            config=config,
-            torch_dtype=torch.float32,  # Use full-precision float32
-            device_map=None,           # We'll move it manually
             low_cpu_mem_usage=False
         )
-        model.to(device)
-        model.eval()
-        tokenizer = AutoTokenizer.from_pretrained(model_path)
     text = (
         f"Given a certain paper,\n"
         f"Title: {title.strip()}\n"
@@ -108,17 +116,20 @@ def predict(title, abstract):
         inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
-            output = model(**inputs)
-        logits = output.logits
         prob = torch.sigmoid(logits).item()
-        score = min(1.0, prob + 0.05)  # +0.05 offset, capped at 1.0
         return round(score, 4)
     except Exception as e:
-        print(f"Prediction error: {e}")
-        return 0.0  # Return 0 in case of any error
 def get_grade_and_emoji(score):
-    """Convert a 0–1 score into a tier grade with emoji indicator."""
     if score >= 0.900: return "AAA 🌟"
     if score >= 0.800: return "AA ⭐"
     if score >= 0.650: return "A ✨"
@@ -129,9 +140,13 @@ def get_grade_and_emoji(score):
     if score >= 0.300: return "CC ✏️"
     return "C 📑"
 def validate_input(title, abstract):
     """
-    Ensure title >=3 words, abstract >=50 words, and only ASCII chars.
     """
     non_ascii = re.compile(r"[^\x00-\x7F]")
     if len(title.split()) < 3:
@@ -145,90 +160,60 @@ def validate_input(title, abstract):
     return True, "Inputs look good."
 def update_button_status(title, abstract):
     valid, msg = validate_input(title, abstract)
     if not valid:
         return gr.update(value="Error: " + msg), gr.update(interactive=False)
     return gr.update(value=msg), gr.update(interactive=True)
 def process_arxiv_input(arxiv_input):
     """
-    Helper to fill in title/abstract fields from an arXiv link/ID.
     """
     if not arxiv_input.strip():
         return "", "", "Please enter an arXiv URL or ID"
-    result = fetch_arxiv_paper(arxiv_input)
-    if result["success"]:
-        return result["title"], result["abstract"], result["message"]
-    return "", "", result["message"]
-# Custom CSS for styling
 css = """
 .gradio-container { font-family: Arial, sans-serif; }
 .main-title {
-    text-align: center;
-    color: #2563eb;
-    font-size: 2.5rem !important;
-    margin-bottom: 1rem !important;
-    background: linear-gradient(45deg, #2563eb, #1d4ed8);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-}
-.sub-title {
-    text-align: center;
-    color: #4b5563;
-    font-size: 1.5rem !important;
-    margin-bottom: 2rem !important;
 }
 .input-section {
-    background: white;
-    padding: 2rem;
-    border-radius: 1rem;
-    box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);
 }
 .result-section {
-    background: #f8fafc;
-    padding: 2rem;
-    border-radius: 1rem;
-    margin-top: 2rem;
-}
-.methodology-section {
-    background: #ecfdf5;
-    padding: 2rem;
-    border-radius: 1rem;
-    margin-top: 2rem;
-}
-.example-section {
-    background: #fff7ed;
-    padding: 2rem;
-    border-radius: 1rem;
-    margin-top: 2rem;
 }
 .grade-display {
-    font-size: 3rem;
-    text-align: center;
-    margin: 1rem 0;
 }
 .arxiv-input {
-    margin-bottom: 1.5rem;
-    padding: 1rem;
-    background: #f3f4f6;
-    border-radius: 0.5rem;
 }
 .arxiv-link {
-    color: #2563eb;
-    text-decoration: underline;
-    font-size: 0.9em;
-    margin-top: 0.5em;
-}
-.arxiv-note {
-    color: #666;
-    font-size: 0.9em;
-    margin-top: 0.5em;
-    margin-bottom: 0.5em;
 }
 """
-# Example papers
 example_papers = [
     {
         "title": "Attention Is All You Need",
@@ -242,7 +227,7 @@ example_papers = [
             "parallelizable and requiring significantly less time to train."
         ),
         "score": 0.982,
-        "note": "💫 Revolutionary paper that introduced the Transformer architecture."
     },
     {
         "title": "Language Models are Few-Shot Learners",
@@ -252,13 +237,13 @@ example_papers = [
             "typically task-agnostic in architecture, this method still requires task-specific "
             "fine-tuning datasets of thousands or tens of thousands of examples. By contrast, humans "
             "can generally perform a new language task from only a few examples or from simple "
-            "instructions - something which current NLP systems still largely struggle to do. Here we "
             "show that scaling up language models greatly improves task-agnostic, few-shot "
             "performance, sometimes even reaching competitiveness with prior state-of-the-art "
             "fine-tuning approaches."
         ),
         "score": 0.956,
-        "note": "🚀 Groundbreaking GPT-3 paper that demonstrated the power of large language models."
     },
     {
         "title": "An Empirical Study of Neural Network Training Protocols",
@@ -270,135 +255,74 @@ example_papers = [
             "insights for deep learning practitioners."
         ),
         "score": 0.623,
-        "note": "📚 Solid research paper with useful findings but more limited scope and impact."
     }
 ]
-# Build Gradio interface
 with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
-    gr.Markdown(
-        """
-        # Papers Impact: AI-Powered Research Impact Predictor
-        ## https://discord.gg/openfreeai
-        """
-    )
-    gr.HTML("""<a href="https://visitorbadge.io/status?path=https%3A%2F%2FVIDraft-PaperImpact.hf.space">
-<img src="https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2FVIDraft-PaperImpact.hf.space&countColor=%23263759" />
-</a>""")
     with gr.Row():
         with gr.Column(elem_classes="input-section"):
-            # arXiv import group
             with gr.Group(elem_classes="arxiv-input"):
-                gr.Markdown("### 📑 Import from arXiv")
                 arxiv_input = gr.Textbox(
                     lines=1,
-                    placeholder="Enter arXiv URL or ID (e.g., 2504.11651)",
-                    label="arXiv Paper URL/ID",
                     value="2504.11651"
                 )
-                gr.Markdown(
-                    """
-                    <p class="arxiv-note">
-                      Click input field to use example paper or browse papers at
-                      <a href="https://arxiv.org" target="_blank" class="arxiv-link">arxiv.org</a>
-                    </p>
-                    """
-                )
-                fetch_button = gr.Button("🔍 Fetch Paper Details", variant="secondary")
-            gr.Markdown("### 📝 Or Enter Paper Details Manually")
             title_input = gr.Textbox(
                 lines=2,
-                placeholder="Enter Paper Title (minimum 3 words)...",
                 label="Paper Title"
             )
-            abstract_input = gr.Textbox(
                 lines=5,
-                placeholder="Enter Paper Abstract (minimum 50 words)...",
                 label="Paper Abstract"
             )
-            validation_status = gr.Textbox(label="✔️ Validation Status", interactive=False)
-            submit_button = gr.Button("🎯 Predict Impact", interactive=False, variant="primary")
         with gr.Column(elem_classes="result-section"):
-            with gr.Group():
-                score_output = gr.Number(label="🎯 Impact Score")
-                grade_output = gr.Textbox(label="🏆 Grade", value="", elem_classes="grade-display")
-    with gr.Row(elem_classes="methodology-section"):
-        gr.Markdown(
-            """
-            ### 🔬 Scientific Methodology
-            - **Training Data**: Model trained on extensive dataset of published papers from CS.CV, CS.CL(NLP), and CS.AI fields
-            - **Optimization**: NDCG optimization with Sigmoid activation and MSE loss function
-            - **Validation**: Cross-validated against historical paper impact data
-            - **Architecture**: Advanced transformer-based deep textual analysis
-            - **Metrics**: Quantitative analysis of citation patterns and research influence
-            """
-        )
-    with gr.Row():
-        gr.Markdown(
-            """
-            ### 📊 Rating Scale
-            | Grade | Score Range | Description | Indicator |
-            |-------|-------------|-------------|-----------|
-            | AAA | 0.900-1.000 | Exceptional Impact | 🌟 |
-            | AA | 0.800-0.899 | Very High Impact | ⭐ |
-            | A | 0.650-0.799 | High Impact | ✨ |
-            | BBB | 0.600-0.649 | Above Average Impact | 🔵 |
-            | BB | 0.550-0.599 | Moderate Impact | 📘 |
-            | B | 0.500-0.549 | Average Impact | 📖 |
-            | CCC | 0.400-0.499 | Below Average Impact | 📝 |
-            | CC | 0.300-0.399 | Low Impact | ✏️ |
-            | C | < 0.299 | Limited Impact | 📑 |
-            """
-        )
-    with gr.Row(elem_classes="example-section"):
-        gr.Markdown("### 📋 Example Papers")
-        for paper in example_papers:
-            gr.Markdown(
-                f"""
-                #### {paper['title']}
-                **Score**: {paper.get('score', 'N/A')} | **Grade**: {get_grade_and_emoji(paper.get('score', 0))}
-                {paper['abstract']}
-                *{paper['note']}*
-                ---
-                """
-            )
-    # Validate button status on input changes
-    title_input.change(
-        update_button_status,
-        inputs=[title_input, abstract_input],
-        outputs=[validation_status, submit_button]
-    )
-    abstract_input.change(
-        update_button_status,
-        inputs=[title_input, abstract_input],
-        outputs=[validation_status, submit_button]
-    )
-    # Fetch from arXiv
-    fetch_button.click(
-        process_arxiv_input,
-        inputs=[arxiv_input],
-        outputs=[title_input, abstract_input, validation_status]
-    )
-    # Predict callback
-    def process_prediction(title, abstract):
-        score = predict(title, abstract)
-        grade = get_grade_and_emoji(score)
-        return score, grade
-    submit_button.click(
-        process_prediction,
-        inputs=[title_input, abstract_input],
-        outputs=[score_output, grade_output]
-    )
 if __name__ == "__main__":
     iface.launch()

 from urllib.parse import urlparse
 import xml.etree.ElementTree as ET
+##################################################
+# Global setup
+##################################################
 model_path = "ssocean/NAIP"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = None
 tokenizer = None
+##################################################
+# Fetch paper info from arXiv
+##################################################
 def fetch_arxiv_paper(arxiv_input):
     """
+    Fetch paper title & abstract from an arXiv URL or ID.
     """
     try:
         if "arxiv.org" in arxiv_input:
             parsed = urlparse(arxiv_input)
             path = parsed.path
             arxiv_id = path.split("/")[-1].replace(".pdf", "")
         else:
             arxiv_id = arxiv_input.strip()
         api_url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
         resp = requests.get(api_url)
         if resp.status_code != 200:
                 "title": "",
                 "abstract": "",
                 "success": False,
+                "message": "Error fetching paper from arXiv API",
             }
         root = ET.fromstring(resp.text)
         ns = {"arxiv": "http://www.w3.org/2005/Atom"}
         entry = root.find(".//arxiv:entry", ns)
         if entry is None:
+            return {"title": "", "abstract": "", "success": False, "message": "Paper not found"}
         title = entry.find("arxiv:title", ns).text.strip()
         abstract = entry.find("arxiv:summary", ns).text.strip()
         return {
             "title": title,
             "abstract": abstract,
             "success": True,
+            "message": "Paper fetched successfully!",
         }
     except Exception as e:
         return {
             "title": "",
             "abstract": "",
             "success": False,
+            "message": f"Error fetching paper: {e}",
         }
+##################################################
+# Prediction function
+##################################################
 @spaces.GPU(duration=60, enable_queue=True)
 def predict(title, abstract):
     """
+    Predict a normalized academic impact score (0–1) from title & abstract.
     """
     global model, tokenizer
     if model is None:
+        # 1) Load config
         config = AutoConfig.from_pretrained(model_path)
+        # 2) Remove quantization_config if it exists to avoid `NoneType` error in PEFT
+        #    This ensures that 'quantization_config.to_dict()' won't be called
+        if hasattr(config, "quantization_config"):
+            del config.quantization_config
+        # 3) (Optional) We can still set config.num_labels = 1 if needed
+        config.num_labels = 1
+        # 4) Load the model
+        model_loaded = AutoModelForSequenceClassification.from_pretrained(
             model_path,
+            config=config,            # pass config
+            torch_dtype=torch.float32,
+            device_map=None,          # manual device
             low_cpu_mem_usage=False
         )
+        model_loaded.to(device)
+        model_loaded.eval()
+        # 5) Load tokenizer
+        tokenizer_loaded = AutoTokenizer.from_pretrained(model_path)
+        # Assign to globals
+        model, tokenizer = model_loaded, tokenizer_loaded
+    # Construct the input text prompt
     text = (
         f"Given a certain paper,\n"
         f"Title: {title.strip()}\n"
         inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
+            outputs = model(**inputs)
+        logits = outputs.logits
         prob = torch.sigmoid(logits).item()
+        score = min(1.0, prob + 0.05)
         return round(score, 4)
     except Exception as e:
+        print("Prediction error:", e)
+        return 0.0
+##################################################
+# Grading
+##################################################
 def get_grade_and_emoji(score):
+    """Map a 0–1 score to an A/B/C style grade with an emoji."""
     if score >= 0.900: return "AAA 🌟"
     if score >= 0.800: return "AA ⭐"
     if score >= 0.650: return "A ✨"
     if score >= 0.300: return "CC ✏️"
     return "C 📑"
+##################################################
+# Validation
+##################################################
 def validate_input(title, abstract):
     """
+    Ensure the title has at least 3 words, the abstract at least 50,
+    and check for ASCII-only characters.
     """
     non_ascii = re.compile(r"[^\x00-\x7F]")
     if len(title.split()) < 3:
     return True, "Inputs look good."
 def update_button_status(title, abstract):
+    """Enable or disable the predict button based on validation."""
     valid, msg = validate_input(title, abstract)
     if not valid:
         return gr.update(value="Error: " + msg), gr.update(interactive=False)
     return gr.update(value=msg), gr.update(interactive=True)
+##################################################
+# Process arXiv input
+##################################################
 def process_arxiv_input(arxiv_input):
     """
+    Called when user clicks 'Fetch Paper Details' to fill in title/abstract from arXiv.
     """
     if not arxiv_input.strip():
         return "", "", "Please enter an arXiv URL or ID"
+    res = fetch_arxiv_paper(arxiv_input)
+    if res["success"]:
+        return res["title"], res["abstract"], res["message"]
+    return "", "", res["message"]
+##################################################
+# Custom CSS
+##################################################
 css = """
 .gradio-container { font-family: Arial, sans-serif; }
 .main-title {
+    text-align: center; color: #2563eb; font-size: 2.5rem!important;
+    margin-bottom:1rem!important;
+    background: linear-gradient(45deg,#2563eb,#1d4ed8);
+    -webkit-background-clip: text; -webkit-text-fill-color: transparent;
 }
 .input-section {
+    background:#fff; padding:1.5rem; border-radius:0.5rem;
+    box-shadow:0 4px 6px rgba(0,0,0,0.1);
 }
 .result-section {
+    background:#f7f9fc; padding:1.5rem; border-radius:0.5rem;
+    margin-top:2rem;
 }
 .grade-display {
+    font-size:2.5rem; text-align:center; margin-top:1rem;
 }
 .arxiv-input {
+    margin-bottom:1.5rem; padding:1rem; background:#f3f4f6;
+    border-radius:0.5rem;
 }
 .arxiv-link {
+    color:#2563eb; text-decoration: underline;
 }
 """
+##################################################
+# Example Papers
+##################################################
 example_papers = [
     {
         "title": "Attention Is All You Need",
             "parallelizable and requiring significantly less time to train."
         ),
         "score": 0.982,
+        "note": "Revolutionary paper that introduced the Transformer architecture."
     },
     {
         "title": "Language Models are Few-Shot Learners",
             "typically task-agnostic in architecture, this method still requires task-specific "
             "fine-tuning datasets of thousands or tens of thousands of examples. By contrast, humans "
             "can generally perform a new language task from only a few examples or from simple "
+            "instructions—something which current NLP systems still largely struggle to do. Here we "
             "show that scaling up language models greatly improves task-agnostic, few-shot "
             "performance, sometimes even reaching competitiveness with prior state-of-the-art "
             "fine-tuning approaches."
         ),
         "score": 0.956,
+        "note": "Groundbreaking GPT-3 paper on few-shot learning."
     },
     {
         "title": "An Empirical Study of Neural Network Training Protocols",
             "insights for deep learning practitioners."
         ),
         "score": 0.623,
+        "note": "Solid empirical comparison of training protocols."
     }
 ]
+##################################################
+# Build the Gradio Interface
+##################################################
 with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
+    gr.Markdown("<div class='main-title'>Papers Impact: AI-Powered Research Impact Predictor</div>")
+    gr.Markdown("**Predict the potential research impact (0–1) from title & abstract.**")
+    # Row with input column + output column
     with gr.Row():
         with gr.Column(elem_classes="input-section"):
+            gr.Markdown("### Import from arXiv")
             with gr.Group(elem_classes="arxiv-input"):
                 arxiv_input = gr.Textbox(
                     lines=1,
+                    placeholder="e.g. 2504.11651",
+                    label="arXiv URL or ID",
                     value="2504.11651"
                 )
+                fetch_btn = gr.Button("🔍 Fetch Paper Details", variant="secondary")
+            gr.Markdown("### Or Enter Manually")
             title_input = gr.Textbox(
                 lines=2,
+                placeholder="Paper title (≥3 words)...",
                 label="Paper Title"
             )
+            abs_input = gr.Textbox(
                 lines=5,
+                placeholder="Paper abstract (≥50 words)...",
                 label="Paper Abstract"
             )
+            status_box = gr.Textbox(label="Validation Status", interactive=False)
+            predict_btn = gr.Button("🎯 Predict Impact", interactive=False, variant="primary")
         with gr.Column(elem_classes="result-section"):
+            score_box = gr.Number(label="Impact Score")
+            grade_box = gr.Textbox(label="Grade", elem_classes="grade-display")
+    # Validation triggers
+    title_input.change(update_button_status, [title_input, abs_input], [status_box, predict_btn])
+    abs_input.change(update_button_status, [title_input, abs_input], [status_box, predict_btn])
+    # arXiv fetch
+    fetch_btn.click(process_arxiv_input, [arxiv_input], [title_input, abs_input, status_box])
+    # Predict handler
+    def run_predict(t, a):
+        s = predict(t, a)
+        return s, get_grade_and_emoji(s)
+    predict_btn.click(run_predict, [title_input, abs_input], [score_box, grade_box])
+    # Example papers
+    gr.Markdown("### Example Papers")
+    for paper in example_papers:
+        gr.Markdown(
+            f"**{paper['title']}**  \n"
+            f"Score: {paper['score']} | Grade: {get_grade_and_emoji(paper['score'])}  \n"
+            f"{paper['abstract']}  \n"
+            f"*{paper['note']}*\n---"
+        )
+##################################################
+# Launch
+##################################################
 if __name__ == "__main__":
     iface.launch()