Spaces:

metacritical
/

DeepSeekPapers

Running

App Files Files Community

metacritical commited on Feb 18

Commit

bf1e8f3

verified ·

1 Parent(s): 4af19e1

Updated Links and chronology

Browse files

Files changed (1) hide show

index.html +156 -129

index.html CHANGED Viewed

@@ -2,37 +2,53 @@
 <html>
 <head>
   <meta charset="utf-8">
-  <meta name="description" content="DeepSeek: Advancing Open-Source Language Models">
-  <meta name="keywords" content="DeepSeek, LLM, AI">
   <meta name="viewport" content="width=device-width, initial-scale=1">
-  <title>DeepSeek: Advancing Open-Source Language Models</title>
   <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
-  <link rel="stylesheet" href="./static/css/bulma.min.css">
-  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
-  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
-  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
-  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
-  <link rel="stylesheet" href="./static/css/index.css">
-  <link rel="icon" href="./static/images/favicon.svg">
-  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
-  <script defer src="./static/js/fontawesome.all.min.js"></script>
-  <script src="./static/js/bulma-carousel.min.js"></script>
-  <script src="./static/js/bulma-slider.min.js"></script>
-  <script src="./static/js/index.js"></script>
 </head>
 <body>
-<section class="hero">
   <div class="hero-body">
     <div class="container is-max-desktop">
       <div class="columns is-centered">
         <div class="column has-text-centered">
-          <h1 class="title is-1 publication-title">DeepSeek: Advancing Open-Source Language Models</h1>
-          <div class="is-size-5 publication-authors">
-            A collection of groundbreaking research papers in AI and language models
-          </div>
         </div>
       </div>
     </div>
@@ -41,123 +57,143 @@
 <section class="section">
   <div class="container is-max-desktop">
-    <!-- Abstract. -->
-    <div class="columns is-centered has-text-centered">
-      <div class="column is-four-fifths">
-        <h2 class="title is-3">Overview</h2>
-        <div class="content has-text-justified">
-          <p>
-            DeepSeek has released a series of significant papers detailing advancements in large language models (LLMs).
-            Each paper represents a step forward in making AI more capable, efficient, and accessible.
-          </p>
-        </div>
-      </div>
-    </div>
-    <!--/ Abstract. -->
-    <!-- Paper Collection -->
-    <div class="columns is-centered has-text-centered">
-      <div class="column is-four-fifths">
-        <h2 class="title is-3">Research Papers</h2>
-        <!-- Paper 1 -->
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
-            <div class="is-size-5 publication-authors">
-              Released: November 29, 2023
             </div>
           </div>
-          <div class="content has-text-justified">
-            <p>This foundational paper explores scaling laws and the trade-offs between data and model size,
-            establishing the groundwork for subsequent models.</p>
-          </div>
-        </div>
-        <!-- Paper 2 -->
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
-            <div class="is-size-5 publication-authors">
-              Released: May 2024
             </div>
           </div>
-          <div class="content has-text-justified">
-            <p>Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing
-            training costs by 42%.</p>
-          </div>
-        </div>
-        <!-- Additional papers following same structure -->
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeek-V3 Technical Report</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
-            <div class="is-size-5 publication-authors">
-              Released: December 2024
             </div>
           </div>
-          <div class="content has-text-justified">
-            <p>Discusses the scaling of sparse MoE networks to 671 billion parameters.</p>
-          </div>
-        </div>
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeek-R1: Incentivizing Reasoning Capability in LLMs</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
-            <div class="is-size-5 publication-authors">
-              Released: January 20, 2025
             </div>
           </div>
-          <div class="content has-text-justified">
-            <p>Enhances reasoning capabilities through large-scale reinforcement learning.</p>
-          </div>
-        </div>
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeekMath: Pushing the Limits of Mathematical Reasoning</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
-            <div class="is-size-5 publication-authors">
-              Released: April 2024
             </div>
           </div>
-          <div class="content has-text-justified">
-            <p>Presents methods to improve mathematical reasoning in LLMs.</p>
-          </div>
-        </div>
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeek-Prover: Advancing Theorem Proving in LLMs</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
-          </div>
-          <div class="content has-text-justified">
-            <p>Focuses on enhancing theorem proving capabilities using synthetic data for training.</p>
           </div>
-        </div>
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
-          </div>
-          <div class="content has-text-justified">
-            <p>Details advancements in code-related tasks with emphasis on open-source methodologies.</p>
           </div>
-        </div>
-        <div class="publication-block">
-          <div class="publication-header">
-            <h3 class="title is-4">DeepSeekMoE: Advancing Mixture-of-Experts Architecture</h3>
-            <span class="tag is-primary is-medium">Deep Dive Coming Soon</span>
           </div>
-          <div class="content has-text-justified">
-            <p>Discusses the integration and benefits of the Mixture-of-Experts approach.</p>
           </div>
         </div>
       </div>
     </div>
@@ -167,19 +203,10 @@
 <footer class="footer">
   <div class="container">
     <div class="content has-text-centered">
-      <a class="icon-link" href="https://github.com/deepseek-ai" target="_blank" class="external-link">
-        <i class="fab fa-github"></i>
-      </a>
-    </div>
-    <div class="columns is-centered">
-      <div class="column is-8">
-        <div class="content">
-          <p>
-            This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
-            Commons Attribution-ShareAlike 4.0 International License</a>.
-          </p>
-        </div>
-      </div>
     </div>
   </div>
 </footer>

 <html>
 <head>
   <meta charset="utf-8">
+  <meta name="description" content="DeepSeek Papers: Advancing Open-Source Language Models">
+  <meta name="keywords" content="DeepSeek, LLM, AI, Research">
   <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>DeepSeek Papers: Advancing Open-Source Language Models</title>
   <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
+  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
+  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
+  <style>
+    .publication-title {
+      color: #363636;
+    }
+    .paper-card {
+      margin-bottom: 2rem;
+      transition: transform 0.2s;
+    }
+    .paper-card:hover {
+      transform: translateY(-5px);
+    }
+    .coming-soon-badge {
+      background-color: #3273dc;
+      color: white;
+      padding: 0.25rem 0.75rem;
+      border-radius: 4px;
+      font-size: 0.8rem;
+      margin-left: 1rem;
+    }
+    .paper-description {
+      color: #4a4a4a;
+      margin-top: 0.5rem;
+    }
+    .release-date {
+      color: #7a7a7a;
+      font-size: 0.9rem;
+    }
+  </style>
 </head>
 <body>
+<section class="hero is-light">
   <div class="hero-body">
     <div class="container is-max-desktop">
       <div class="columns is-centered">
         <div class="column has-text-centered">
+          <h1 class="title is-1 publication-title">DeepSeek Papers</h1>
+          <h2 class="subtitle is-3">Advancing Open-Source Language Models</h2>
         </div>
       </div>
     </div>
 <section class="section">
   <div class="container is-max-desktop">
+    <div class="content">
+      <div class="columns is-centered">
+        <div class="column is-10">
+          <!-- Native Sparse Attention -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                <a href="https://arxiv.org/abs/2502.11089">Native Sparse Attention: Hardware-Aligned and Natively Trainable Sparse Attention</a>
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="release-date">Released: February 2025</p>
+              <p class="paper-description">
+                Introduces a new approach to sparse attention that is both hardware-efficient and natively trainable,
+                improving the performance of large language models.
+              </p>
             </div>
           </div>
+          <!-- DeepSeek-R1 -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="release-date">Released: January 20, 2025</p>
+              <p class="paper-description">
+                The R1 model builds on previous work to enhance reasoning capabilities through large-scale
+                reinforcement learning, competing directly with leading models like OpenAI's o1.
+              </p>
             </div>
           </div>
+          <!-- DeepSeek-V3 -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeek-V3 Technical Report
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="release-date">Released: December 2024</p>
+              <p class="paper-description">
+                Discusses the scaling of sparse MoE networks to 671 billion parameters, utilizing mixed precision
+                training and high-performance computing (HPC) co-design strategies.
+              </p>
             </div>
           </div>
+          <!-- DeepSeek-V2 -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="release-date">Released: May 2024</p>
+              <p class="paper-description">
+                Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing
+                training costs by 42%. Emphasizes strong performance characteristics and efficiency improvements.
+              </p>
             </div>
           </div>
+          <!-- DeepSeekMath -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="release-date">Released: April 2024</p>
+              <p class="paper-description">
+                This paper presents methods to improve mathematical reasoning in LLMs, introducing the
+                Group Relative Policy Optimization (GRPO) algorithm during reinforcement learning stages.
+              </p>
             </div>
           </div>
+          <!-- DeepSeekLLM -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="release-date">Released: November 29, 2023</p>
+              <p class="paper-description">
+                This foundational paper explores scaling laws and the trade-offs between data and model size,
+                establishing the groundwork for subsequent models.
+              </p>
+            </div>
           </div>
+          <!-- Papers without specific dates -->
+          <!-- DeepSeek-Prover -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeek-Prover: Advancing Theorem Proving in LLMs through Large-Scale Synthetic Data
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="paper-description">
+                Focuses on enhancing theorem proving capabilities in language models using synthetic data
+                for training, establishing new benchmarks in automated mathematical reasoning.
+              </p>
+            </div>
           </div>
+          <!-- DeepSeek-Coder-V2 -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models in Code Intelligence
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="paper-description">
+                This paper details advancements in code-related tasks with an emphasis on open-source
+                methodologies, improving upon earlier coding models with enhanced capabilities.
+              </p>
+            </div>
           </div>
+          <!-- DeepSeekMoE -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeekMoE: Advancing Mixture-of-Experts Architecture
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="paper-description">
+                Discusses the integration and benefits of the Mixture-of-Experts approach within the
+                DeepSeek framework, focusing on scalability and efficiency improvements.
+              </p>
+            </div>
           </div>
         </div>
       </div>
     </div>
 <footer class="footer">
   <div class="container">
     <div class="content has-text-centered">
+      <p>
+        This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">
+        Creative Commons Attribution-ShareAlike 4.0 International License</a>.
+      </p>
     </div>
   </div>
 </footer>