Spaces:

gnaw05
/

compare_docs

Sleeping

App Files Files Community

gnaw05 commited on 2 days ago

Commit

96d8c6a

verified ·

1 Parent(s): ce9dc81

u

Browse files

Files changed (1) hide show

app.py +25 -19

app.py CHANGED Viewed

@@ -14,7 +14,6 @@ st.set_page_config(page_title="📑 Contract Analyzer", layout="wide")
 # ========== FUNCTIONS ==========
 # Tải mô hình Hugging Face từ Hub
-@st.cache_resource
 def load_inference_client():
     return InferenceClient(repo_id="HuggingFaceH4/zephyr-7b-beta")  # Mô hình Zephyr
@@ -23,19 +22,23 @@ inference_client = load_inference_client()
 def extract_text_from_pdf(uploaded_file):
     try:
         with pdfplumber.open(uploaded_file) as pdf:
-            return "\n".join(page.extract_text() or "" for page in pdf.pages)
-    except:
-        try:
-            reader = PyPDF4.PdfFileReader(uploaded_file)
-            return "\n".join([reader.getPage(i).extractText() for i in range(reader.numPages)])
-        except Exception as e:
-            st.error(f"Error reading PDF: {e}")
-            return ""
 def load_text(file):
     if not file:
         return ""
     try:
         ext = file.name.split('.')[-1].lower()
         if ext == 'txt':
             return StringIO(file.getvalue().decode("utf-8")).read()
@@ -79,7 +82,11 @@ def query_zephyr_model(text1, text2, question):
     prompt = f"Compare the following two contracts and answer the question:\nText 1: {text1}\nText 2: {text2}\nQuestion: {question}"
     try:
         result = inference_client(inputs=prompt)
-        return result['generated_text']
     except Exception as e:
         st.error(f"Error querying the model: {e}")
         return None
@@ -129,16 +136,15 @@ def main():
     user_question = st.text_input("Enter your question about the contracts:")
     if user_question and st.button("Analyze Question"):
-        col = st.columns(1)
-        with col:
-            st.subheader("Answer from Document")
-            with st.spinner("Analyzing..."):
-                try:
-                    pred = query_zephyr_model(text1, text2, user_question)
                     st.success(pred)
-                except Exception as e:
-                    st.error(f"Failed on Document: {e}")
 if __name__ == "__main__":
     main()

 # ========== FUNCTIONS ==========
 # Tải mô hình Hugging Face từ Hub
 def load_inference_client():
     return InferenceClient(repo_id="HuggingFaceH4/zephyr-7b-beta")  # Mô hình Zephyr
 def extract_text_from_pdf(uploaded_file):
     try:
         with pdfplumber.open(uploaded_file) as pdf:
+            text = "\n".join(page.extract_text() or "" for page in pdf.pages)
+            if not text.strip():
+                raise ValueError("No extractable text found in the PDF")
+            return text
+    except Exception as e:
+        st.error(f"Error reading PDF: {e}")
+        return ""
 def load_text(file):
     if not file:
         return ""
     try:
+        # Check file size (e.g., limit to 10MB)
+        if file.size > 10 * 1024 * 1024:  # 10MB
+            st.warning("File is too large. Please upload a smaller file.")
+            return ""
         ext = file.name.split('.')[-1].lower()
         if ext == 'txt':
             return StringIO(file.getvalue().decode("utf-8")).read()
     prompt = f"Compare the following two contracts and answer the question:\nText 1: {text1}\nText 2: {text2}\nQuestion: {question}"
     try:
         result = inference_client(inputs=prompt)
+        if 'generated_text' in result:
+            return result['generated_text']
+        else:
+            st.error("No generated text found in the response.")
+            return None
     except Exception as e:
         st.error(f"Error querying the model: {e}")
         return None
     user_question = st.text_input("Enter your question about the contracts:")
     if user_question and st.button("Analyze Question"):
+        with st.spinner("Analyzing..."):
+            try:
+                pred = query_zephyr_model(text1, text2, user_question)
+                if pred:
                     st.success(pred)
+                else:
+                    st.error("Failed to get a valid answer from the model.")
+            except Exception as e:
+                st.error(f"Failed on Document: {e}")
 if __name__ == "__main__":
     main()